stagehand icon indicating copy to clipboard operation
stagehand copied to clipboard

[Feature] llm Options

Open 729993031 opened this issue 1 year ago • 6 comments

Background

I wish to experiment with other LLM models like Tongyi or Gemini, but it's not feasible.

https://github.com/browserbase/stagehand/blob/fecd42e9c71f60f50ce14212f76b422603933ab8/lib/llm/LLMProvider.ts#L62

Aspiration

Provide a solution akin to browser-use, allowing the integration of custom models.

import { OpenAI } from "@langchain/openai";

const llm = new OpenAI({
  model: "gpt-3.5-turbo-instruct",
  temperature: 0,
  maxTokens: undefined,
  timeout: undefined,
  maxRetries: 2,
  apiKey: process.env.OPENAI_API_KEY,
  // other params...
});
await stagehand.init({llm});

Benefits

  1. less code、lower maintenance complexity
  2. The token cost of GPT-4 is relatively high; in certain scenarios, cheaper models can be employed to achieve similar outcomes.

729993031 avatar Dec 15 '24 06:12 729993031

+1, this would also be nice if we could use with OpenRouter for many reasons (e.g. rate limits, consolidated billing, experimenting with different models to determine the output quality against cost)

conradkoh avatar Dec 24 '24 01:12 conradkoh

LiteLLM is another option. I second the request for gemini and other model options.

achris7 avatar Jan 08 '25 15:01 achris7

This would be a great addition. We’re price sensitive it would help a lot.

cmaycumber avatar Jan 08 '25 19:01 cmaycumber

up for this, i wish to use deepseek API which has cheaper price compared the current option with roughly similar capabilities

mugnimaestra avatar Jan 26 '25 04:01 mugnimaestra

I would also like to be able to use DeepSeek, as its open source and can be downloaded. Should be one of the defaults togheter with the other two

ahlstro avatar Jan 28 '25 16:01 ahlstro

You can already just do;

import { google } from "@ai-sdk/google";
import { z } from "zod";
import { Stagehand } from "@/dist";
import { AISdkClient } from "./external_clients/aisdk";
import StagehandConfig from "@/stagehand.config";

async function example() {
  const stagehand = new Stagehand({
    ...StagehandConfig,
    llmClient: new AISdkClient({
      model: google("gemini-1.5-flash-latest"),
    }),
  });

  await stagehand.init();
  await stagehand.page.goto("https://news.ycombinator.com");

  const headlines = await stagehand.page.extract({
    instruction: "Extract only 3 stories from the Hacker News homepage.",
    schema: z.object({
      stories: z
        .array(
          z.object({
            title: z.string(),
            url: z.string(),
            points: z.number(),
          }),
        )
        .length(3),
    }),
  });

  console.log(headlines);

  await stagehand.close();
}

(async () => {
  await example();
})();

import {
  CoreAssistantMessage,
  CoreMessage,
  CoreSystemMessage,
  CoreTool,
  CoreUserMessage,
  generateObject,
  generateText,
  ImagePart,
  LanguageModel,
  TextPart,
} from "ai";
import { ChatCompletion } from "openai/resources/chat/completions";
import { CreateChatCompletionOptions, LLMClient, AvailableModel } from "@/dist";

export class AISdkClient extends LLMClient {
  public type = "aisdk" as const;
  private model: LanguageModel;

  constructor({ model }: { model: LanguageModel }) {
    super(model.modelId as AvailableModel);
    this.model = model;
  }

  async createChatCompletion<T = ChatCompletion>({
    options,
  }: CreateChatCompletionOptions): Promise<T> {
    const formattedMessages: CoreMessage[] = options.messages.map((message) => {
      if (Array.isArray(message.content)) {
        if (message.role === "system") {
          const systemMessage: CoreSystemMessage = {
            role: "system",
            content: message.content
              .map((c) => ("text" in c ? c.text : ""))
              .join("\n"),
          };
          return systemMessage;
        }

        const contentParts = message.content.map((content) => {
          if ("image_url" in content) {
            const imageContent: ImagePart = {
              type: "image",
              image: content.image_url.url,
            };
            return imageContent;
          } else {
            const textContent: TextPart = {
              type: "text",
              text: content.text,
            };
            return textContent;
          }
        });

        if (message.role === "user") {
          const userMessage: CoreUserMessage = {
            role: "user",
            content: contentParts,
          };
          return userMessage;
        } else {
          const textOnlyParts = contentParts.map((part) => ({
            type: "text" as const,
            text: part.type === "image" ? "[Image]" : part.text,
          }));
          const assistantMessage: CoreAssistantMessage = {
            role: "assistant",
            content: textOnlyParts,
          };
          return assistantMessage;
        }
      }

      return {
        role: message.role,
        content: message.content,
      };
    });

    if (options.response_model) {
      const response = await generateObject({
        model: this.model,
        messages: formattedMessages,
        schema: options.response_model.schema,
      });

      return response.object;
    }

    const tools: Record<string, CoreTool> = {};

    for (const rawTool of options.tools) {
      tools[rawTool.name] = {
        description: rawTool.description,
        parameters: rawTool.parameters,
      };
    }

    const response = await generateText({
      model: this.model,
      messages: formattedMessages,
      tools,
    });

    return response as T;
  }
}

gruckion avatar Feb 24 '25 12:02 gruckion