CommunityScrapers feat: Performer Image by scene cover scraper

Sets performer's selected scene cover as image for the performer.

Jun 25 '22 21:06 TgSeed

From what i understand the use case is for an existing performer use a selected scene cover image as the profile image right? The use of a custom title to store info for the byFragment part was pretty clever. The only issue i located is

The graphql queries return all scene/performer info while only a couple of fields are needed. We only need to update the image and change nothing else anyway
For the byFragment scrape you use ids that depend on the order of the list you fetched from the byName scrape and you repeat the whole process again. This can be avoided by using the scene_id directly instead of an id obtained by counting. Once you parse the scene_id you can get the image with a single graphql query

To make it more clear Using the below graphql queries instead

def getSceneScreenshot(scene_id):
    query = """
    query FindScene($id: ID!, $checksum: String) {
        findScene(id: $id, checksum: $checksum) {
        id
        paths {
            screenshot
            }
        }
    }
    """
    variables = {
        "id": scene_id
    }
    result = callGraphQL(query, variables)
    if result:
        return result.get('findScene')
    return None

def getSceneIdByPerformerId(performer_id):
    query = """
        query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) {
          findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) {
            scenes {
                id
                title
                path
                paths {
                    screenshot
                    }
                }
          }
        }
    """
    variables = {
        "filter": {
            "page": 1,
            "per_page": 20,
            "sort": "id",
            "direction": "DESC"
        },
        "scene_filter": {
            "performers": {
                "value": [str(performer_id)],
                "modifier": "INCLUDES_ALL"
            }
        }
    }

    result = callGraphQL(query, variables)
    if result:
        return result.get('findScenes')
    return None


def getPerformersIdByName(performer_name):
    query = """
        query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) {
          findPerformers(filter: $filter, performer_filter: $performer_filter) {
            count
            performers {
              ...PerformerData
            }
          }
        }
        
        fragment PerformerData on Performer {
          id
          name
          aliases          
          }
    """

    variables = {
        "filter": {
            "q": performer_name,
            "page": 1,
            "per_page": 20,
            "sort": "name",
            "direction": "ASC"
        },
        "performer_filter": {}
    }

    result = callGraphQL(query, variables)
    if result:
        return result.get('findPerformers')
    return None

and adjusting the code like below

import json
import re
import sys
from pathlib import Path

try:
    from py_common import log
    from py_common import graphql
except ModuleNotFoundError:
    print(
        "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)",
        file=sys.stderr
    )
    sys.exit()

MAX_TITLE_LENGTH = 25

def announce_result_to_stash(result):
    if result is None:
        result = [] if 'query' in sys.argv else {}
    if 'query' in sys.argv:
        if isinstance(result, list):
            print(json.dumps(result))
            sys.exit(0)
        else:
            print(json.dumps([result]))
            sys.exit(0)
    else:
        if isinstance(result, list):
            if len(result) > 0:
                print(json.dumps(result[0]))
                sys.exit(0)
            else:
                print("{}")
                sys.exit(0)
        else:
            print(json.dumps(result))
            sys.exit(0)

# Allows us to simply debug the script via CLI args
if len(sys.argv) > 2 and '-d' in sys.argv:
    stdin = sys.argv[sys.argv.index('-d') + 1]
else:
    stdin = sys.stdin.read()

frag = json.loads(stdin)
performer_name = frag.get("name")
if performer_name is None:
    announce_result_to_stash(None)
else:
    performer_name = str(performer_name)

regex_obj_parse_name_with_scene = re.compile(r"(.*?) - Scene (\d+)\. (.*)", re.IGNORECASE | re.MULTILINE)

parsed_name = regex_obj_parse_name_with_scene.search(performer_name)


if parsed_name:
    # scene id already available, get scene directly
    performer_name = parsed_name.group(1)
    scene_id = parsed_name.group(2)
    log.debug(f"Using scene {scene_id} to get performer image")
    performer_scene =  graphql.getSceneScreenshot(scene_id)
    performer = {'Name': performer_name, 'Image': performer_scene['paths']['screenshot']}
    announce_result_to_stash(performer)
else:
    # search for scenes with the performer

    # first find the id of the performer
    performers_data = graphql.getPerformersIdByName(performer_name)
    performer_data = None
    if performers_data is None or performers_data['count'] < 1:
        announce_result_to_stash(None)
    elif performers_data['count'] > 1:
        for performers_data_element in performers_data['performers']:
            if str(performers_data_element['name']).lower().strip() == performer_name.lower().strip():
                performer_data = performers_data_element
                break
        if performer_data is None:
            # No match found by looking into the names, let's loop again and match with the aliases
            for performers_data_element in performers_data['performers']:
                if performer_name.lower().strip() in str(performers_data_element['aliases']).lower().strip():
                    performer_data = performers_data_element
                    break
    else:
        performer_data = performers_data['performers'][0]

    if performer_data is None or 'id' not in performer_data or int(performer_data['id']) < 0:
        announce_result_to_stash(None)

    # get all scenes with the performer
    performer_scenes = graphql.getSceneIdByPerformerId(performer_data['id'])

    image_candidates = []
    for scene in performer_scenes['scenes']:
        if 'paths' in scene and 'screenshot' in scene['paths'] and len(scene['paths']['screenshot']) > 0:
            if 'query' in sys.argv:
                scene_title = scene.get("title")
                if scene_title is None:
                    scene_title = Path(scene["path"]).name
                image_candidates.append(
                    {'Name': f'{performer_name} - Scene {scene["id"]}. {scene_title[0:MAX_TITLE_LENGTH]}'}
                )
    announce_result_to_stash(image_candidates)

the byName query is a lot faster as only the necessary fields are retrieved
the byFragment query is almost instant (almost 100x times faster) since the scene with the cover is retrieved from a single query by its scene id directly
scenes that dont have titles (it is possible to have some of those) use the filename for the listing in the byName query. The title part of the scene is clamped to 25 chars (might need tweaking) as if it is too large it messes up the result window

Jul 06 '22 21:07 bnkai

From what i understand the use case is for an existing performer use a selected scene cover image as the profile image right?

Yes!

The use of a custom title to store info for the byFragment part was pretty clever.

Thanks

To make it more clear

Great job, thanks alot. Applied your changes, however still kept the Images in the Performer dict because that kinda helps in future where [Feature] Multiple performer image #571 gets done. Also helps me to see the image in the result list in my own fork/pr while it's not making issues for the original versions too.

Sep 25 '22 21:09 TgSeed