feat: Performer Image by scene cover scraper
Sets performer's selected scene cover as image for the performer.
From what i understand the use case is for an existing performer use a selected scene cover image as the profile image right? The use of a custom title to store info for the byFragment part was pretty clever. The only issue i located is
- The graphql queries return all scene/performer info while only a couple of fields are needed. We only need to update the image and change nothing else anyway
- For the byFragment scrape you use ids that depend on the order of the list you fetched from the byName scrape and you repeat the whole process again. This can be avoided by using the scene_id directly instead of an id obtained by counting. Once you parse the scene_id you can get the image with a single graphql query
To make it more clear Using the below graphql queries instead
def getSceneScreenshot(scene_id):
query = """
query FindScene($id: ID!, $checksum: String) {
findScene(id: $id, checksum: $checksum) {
id
paths {
screenshot
}
}
}
"""
variables = {
"id": scene_id
}
result = callGraphQL(query, variables)
if result:
return result.get('findScene')
return None
def getSceneIdByPerformerId(performer_id):
query = """
query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) {
findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) {
scenes {
id
title
path
paths {
screenshot
}
}
}
}
"""
variables = {
"filter": {
"page": 1,
"per_page": 20,
"sort": "id",
"direction": "DESC"
},
"scene_filter": {
"performers": {
"value": [str(performer_id)],
"modifier": "INCLUDES_ALL"
}
}
}
result = callGraphQL(query, variables)
if result:
return result.get('findScenes')
return None
def getPerformersIdByName(performer_name):
query = """
query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) {
findPerformers(filter: $filter, performer_filter: $performer_filter) {
count
performers {
...PerformerData
}
}
}
fragment PerformerData on Performer {
id
name
aliases
}
"""
variables = {
"filter": {
"q": performer_name,
"page": 1,
"per_page": 20,
"sort": "name",
"direction": "ASC"
},
"performer_filter": {}
}
result = callGraphQL(query, variables)
if result:
return result.get('findPerformers')
return None
and adjusting the code like below
import json
import re
import sys
from pathlib import Path
try:
from py_common import log
from py_common import graphql
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr
)
sys.exit()
MAX_TITLE_LENGTH = 25
def announce_result_to_stash(result):
if result is None:
result = [] if 'query' in sys.argv else {}
if 'query' in sys.argv:
if isinstance(result, list):
print(json.dumps(result))
sys.exit(0)
else:
print(json.dumps([result]))
sys.exit(0)
else:
if isinstance(result, list):
if len(result) > 0:
print(json.dumps(result[0]))
sys.exit(0)
else:
print("{}")
sys.exit(0)
else:
print(json.dumps(result))
sys.exit(0)
# Allows us to simply debug the script via CLI args
if len(sys.argv) > 2 and '-d' in sys.argv:
stdin = sys.argv[sys.argv.index('-d') + 1]
else:
stdin = sys.stdin.read()
frag = json.loads(stdin)
performer_name = frag.get("name")
if performer_name is None:
announce_result_to_stash(None)
else:
performer_name = str(performer_name)
regex_obj_parse_name_with_scene = re.compile(r"(.*?) - Scene (\d+)\. (.*)", re.IGNORECASE | re.MULTILINE)
parsed_name = regex_obj_parse_name_with_scene.search(performer_name)
if parsed_name:
# scene id already available, get scene directly
performer_name = parsed_name.group(1)
scene_id = parsed_name.group(2)
log.debug(f"Using scene {scene_id} to get performer image")
performer_scene = graphql.getSceneScreenshot(scene_id)
performer = {'Name': performer_name, 'Image': performer_scene['paths']['screenshot']}
announce_result_to_stash(performer)
else:
# search for scenes with the performer
# first find the id of the performer
performers_data = graphql.getPerformersIdByName(performer_name)
performer_data = None
if performers_data is None or performers_data['count'] < 1:
announce_result_to_stash(None)
elif performers_data['count'] > 1:
for performers_data_element in performers_data['performers']:
if str(performers_data_element['name']).lower().strip() == performer_name.lower().strip():
performer_data = performers_data_element
break
if performer_data is None:
# No match found by looking into the names, let's loop again and match with the aliases
for performers_data_element in performers_data['performers']:
if performer_name.lower().strip() in str(performers_data_element['aliases']).lower().strip():
performer_data = performers_data_element
break
else:
performer_data = performers_data['performers'][0]
if performer_data is None or 'id' not in performer_data or int(performer_data['id']) < 0:
announce_result_to_stash(None)
# get all scenes with the performer
performer_scenes = graphql.getSceneIdByPerformerId(performer_data['id'])
image_candidates = []
for scene in performer_scenes['scenes']:
if 'paths' in scene and 'screenshot' in scene['paths'] and len(scene['paths']['screenshot']) > 0:
if 'query' in sys.argv:
scene_title = scene.get("title")
if scene_title is None:
scene_title = Path(scene["path"]).name
image_candidates.append(
{'Name': f'{performer_name} - Scene {scene["id"]}. {scene_title[0:MAX_TITLE_LENGTH]}'}
)
announce_result_to_stash(image_candidates)
- the byName query is a lot faster as only the necessary fields are retrieved
- the byFragment query is almost instant (almost 100x times faster) since the scene with the cover is retrieved from a single query by its scene id directly
- scenes that dont have titles (it is possible to have some of those) use the filename for the listing in the byName query. The
titlepart of the scene is clamped to 25 chars (might need tweaking) as if it is too large it messes up the result window
From what i understand the use case is for an existing performer use a selected scene cover image as the profile image right?
Yes!
The use of a custom title to store info for the byFragment part was pretty clever.
Thanks
To make it more clear
Great job, thanks alot.
Applied your changes, however still kept the Images in the Performer dict because that kinda helps in future where [Feature] Multiple performer image #571 gets done. Also helps me to see the image in the result list in my own fork/pr while it's not making issues for the original versions too.