firecrawl icon indicating copy to clipboard operation
firecrawl copied to clipboard

[Bug] Self hosted crawl supabase error in crawlStatusController

Open victorserbu2709 opened this issue 10 months ago • 6 comments

Describe the Bug I deployed self hosted version with docker compose. I tried to crawl a site. In client i receive error

api-1                 | 2025-02-11 16:12:16 error [:]: Error occurred in request! (/v1/crawl/c5166295-b66f-4142-b274-171478b757d5) -- ID 222ff2e7fcb44b43bfe02aeb60ff9886 -- {"message":"Supabase client is not configured.","name":"Error","
stack":"Error: Supabase client is not configured.\n    at Proxy.<anonymous> (/app/dist/src/services/supabase.js:41:23)\n    at crawlStatusController (/app/dist/src/controllers/v1/crawl-status.js:154:14)\n    at process.processTicksAndRej
ections (node:internal/process/task_queues:95:5)"} {}

To Reproduce

from firecrawl import FirecrawlApp

app = FirecrawlApp(api_url="http://10.150.255.150:3002")
def crawl(url):
    scrape_status = app.crawl_url(
   url, 
    params={'scrapeOptions': {'formats': ['markdown']}, "limit":3, "maxDepth":1}
    )
    if not scrape_status['success']:
        return "Error scrapping"
    # return scrape_status
    out=""
    for content in scrape_status['data']:
        out +="url:" + content['metadata']['url'] + "\n"
        out += "-----------\n"
        out += content['markdown']
        out += "-----------\n"
    return out

print (crawl())

.env

NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
REDIS_RATE_LIMIT_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape

## To turn on DB authentication, you need to set up supabase.
USE_DB_AUTHENTICATION=false

Expected Behavior to be able to use without supabase when self hosting

Logs

api-1                 | 2025-02-11 16:12:16 error [:]: Error occurred in request! (/v1/crawl/c5166295-b66f-4142-b274-171478b757d5) -- ID 222ff2e7fcb44b43bfe02aeb60ff9886 -- {"message":"Supabase client is not configured.","name":"Error","
stack":"Error: Supabase client is not configured.\n    at Proxy.<anonymous> (/app/dist/src/services/supabase.js:41:23)\n    at crawlStatusController (/app/dist/src/controllers/v1/crawl-status.js:154:14)\n    at process.processTicksAndRej
ections (node:internal/process/task_queues:95:5)"} {}

Additional Context If i modify getCrawlJobs to retry it works, eg

export async function getCrawlJobs(id: string): Promise<string[]> {
  const retryDelay = 1000; 
  let jobs: string[];

  do {
    jobs = await redisConnection.smembers("crawl:" + id + ":jobs");
    console.log("XXXXXXXX", jobs)
    if (jobs.length >= 1) {
      break;
    }
    await new Promise(resolve => setTimeout(resolve, retryDelay));
  } while (true);

  return jobs;
}

victorserbu2709 avatar Feb 11 '25 19:02 victorserbu2709

+1 on this issue, also experiencing it on the latest version of Firecrawl self-hosted

mikelmao avatar Feb 12 '25 22:02 mikelmao

The same error. It does not work even retries code added.

davideuler avatar Feb 13 '25 16:02 davideuler

+1 on this issue, also facing the same issue on self-hosted. I tried with the retry code but not working anyway.

snicholas avatar Feb 14 '25 07:02 snicholas

This fixed it for me.

diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts
index d43562b6..3ec1c61e 100644
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@@ -120,7 +120,7 @@ export async function crawlStatusController(
     return res.status(404).json({ success: false, error: "Job not found" });
   }
 
-  if (sc.team_id !== req.auth.team_id) {
+  if (process.env.USE_DB_AUTHENTICATION === "true" && sc.team_id !== req.auth.team_id) {
     return res.status(403).json({ success: false, error: "Forbidden" });
   }
 
@@ -244,7 +244,7 @@ export async function crawlStatusController(
 
   let totalCount = jobIDs.length;
 
-  if (totalCount === 0) {
+  if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") {
     const x = await supabase_service
       .from('firecrawl_jobs')
       .select('*', { count: 'exact', head: true })

ghost avatar Feb 14 '25 18:02 ghost

This did not fix it for me @rakki194

Though im using the /extract endpoint

mikelmao avatar Feb 18 '25 16:02 mikelmao

Adding support for bullMQ did the job though

extract-status.ts

import { Response } from "express";
import { supabaseGetJobsById } from "../../lib/supabase-jobs";
import { RequestWithAuth } from "./types";
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
import { getExtractQueue } from "../../services/queue-service";

export async function extractStatusController(
  req: RequestWithAuth<{ jobId: string }, any, any>,
  res: Response,
) {
  const extract = await getExtract(req.params.jobId);

  if (!extract) {
    return res.status(404).json({
      success: false,
      error: "Extract job not found",
    });
  }

  let data: any[] = [];
  if (extract.status === "completed") {
    // Try to get data from BullMQ first
    const bullJob = await getExtractQueue().getJob(req.params.jobId);
    let jobData = bullJob?.returnvalue;

    // If USE_DB_AUTHENTICATION is true or no BullMQ data, try Supabase
    if (!jobData && process.env.USE_DB_AUTHENTICATION === "true") {
      try {
        const supabaseData = await supabaseGetJobsById([req.params.jobId]);
        if (supabaseData && supabaseData.length > 0) {
          jobData = supabaseData[0].docs;
        }
      } catch (error) {
        console.warn('Supabase data fetch failed:', error);
      }
    }

    if (jobData) {
      data = Array.isArray(jobData) ? jobData : [jobData];
    }
  }

  return res.status(200).json({
    success: extract.status === "failed" ? false : true,
    data: data,
    status: extract.status,
    error: extract?.error ?? undefined,
    expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
    steps: extract.showSteps ? extract.steps : undefined,
    llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
    sources: extract.showSources ? extract.sources : undefined,
  });
}

mikelmao avatar Feb 18 '25 17:02 mikelmao

Hi! This is now fixed in main.

mogery avatar Feb 20 '25 01:02 mogery