PyMovieDb
PyMovieDb copied to clipboard
Include trailers
I was unable to parse in a better way, but this do the job:
diff --git i/PyMovieDb/imdb.py w/PyMovieDb/imdb.py
index 6eecd57..4bd3da1 100644
--- i/PyMovieDb/imdb.py
+++ w/PyMovieDb/imdb.py
@@ -144,6 +144,25 @@ class IMDB:
# invalid char(s) is/are not in description/trailer/reviewBody schema
return self.NA
+ trailers = []
+ trailer_url = result.get("trailer", {"embedUrl": None}).get("embedUrl")
+ if trailer_url:
+ try:
+ response = self.session.get(trailer_url)
+ #print(response.html.text)
+ #result = response.html.xpath("//video")[0].text
+ s = response.html.text
+ import re
+ m = re.findall(r'"url":"https://imdb-video.media-imdb.com[^"]+mp4[^"]+Expires[^"]+"', s)
+ for t_url in m:
+ t = {}
+ t["url"] = json.loads("{"+m[0]+"}").get("url")
+ trailers.append(t)
+ #result = f"""{result}"""
+ except IndexError:
+ t = {}
+ t["url"] = trailer_url
+ trailers.append(t)
output = {
"type": result.get('@type'),
"name": result.get('name'),
@@ -176,6 +195,10 @@ class IMDB:
"datePublished": result.get("datePublished"),
"keywords": result.get("keywords"),
"duration": result.get("duration"),
+ "trailer": {
+ "thumbnail": result.get("trailer", {"thumbnailUrl": None}).get("thumbnailUrl"),
+ "links": trailers
+ },
"actor": [
{"name": actor.get("name"), "url": actor.get("url")} for actor in result.get("actor", [])
],
Trailers has a deeper structure with stream quality like:
{
"url": "https://imdb-video.media-imdb.com/whatever",
"__typename":"PlaybackURL"
},
{
"displayName": {
"value":"SD",
"language":"en-US",
"__typename":"LocalizedString"
},
"videoMimeType":"MP4",
"videoDefinition":"DEF_SD"
}
but I was unable to find a way to parse that with xpath expressions
final result should be like:
"trailers": [
{
"definition: "480p"
"thumbnail": url,
"url": url
},
{
"definition: "SD"
"thumbnail": url,
"url": url
}
]
while right now is:
"trailer": {
"thumbnail": url,
"links": [
{
"url": url
},
{
"url": url
}, ...
]
}