transformers.js
transformers.js copied to clipboard
TypeError: fetch failed at Object.fetch
System Info
Transformers.js: 2.15.0 Electron: 28.2.1 Vite: 4.5.2
App here: https://github.com/reorproject/reor
Environment/Platform
- [ ] Website/web-app
- [ ] Browser extension
- [ ] Server-side (e.g., Node.js, Deno, Bun)
- [X] Desktop app (e.g., Electron)
- [ ] Other (e.g., VSCode extension)
Description
Users are reporting issues with model download and getting the following error:
Failed to initialize pipeline TypeError: fetch failed at Object.fetch (node:internal/deps/undici/undici:12293:11) at process.processTicksAndRejections (node:internal/process/task_queues:95:5) at async getModelFile (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/utils/hub.js:471:24) at async getModelJSON (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/utils/hub.js:575:18) at async Promise.all (index 0) at async loadTokenizer (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/tokenizers.js:61:18) at async AutoTokenizer.from_pretrained (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/tokenizers.js:4343:50) at async Promise.all (index 0) at async loadItems (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/pipelines.js:3116:5) at async pipeline (file:///*****/@[email protected]/node_modules/@xenova/transformers/src/pipelines.js:3056:21) { cause: Error: read ECONNRESET at TLSWrap.onStreamRead (node:internal/stream_base_commons:217:20) { errno: -4077, code: 'ECONNRESET', syscall: 'read' } } Error during file indexing: TypeError: fetch failed at Object.fetch (node:internal/deps/undici/undici:12293:11) at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
The issue tracked in the app's project is here: https://github.com/reorproject/reor/issues/52
Reproduction
The following code is the main part that deals with transformers.js:
export async function createEmbeddingFunction(
repoName: string,
sourceColumn: string
): Promise<EnhancedEmbeddingFunction<string | number[]>> {
let pipe: Pipeline;
let tokenizer: PreTrainedTokenizer;
let contextLength: number;
try {
const { pipeline, env, AutoTokenizer } = await import(
"@xenova/transformers"
);
const cacheDir = path.join(app.getPath("userData"), "models", "embeddings");
env.cacheDir = cacheDir;
try {
pipe = (await pipeline("feature-extraction", repoName, {
cache_dir: cacheDir,
})) as Pipeline;
contextLength = pipe.model.config.hidden_size;
} catch (error) {
throw new Error(
`Pipeline initialization failed for repo '${repoName}': ${errorToString(
error
)}`
);
}
try {
tokenizer = await AutoTokenizer.from_pretrained(repoName, {
cache_dir: cacheDir,
});
} catch (error) {
throw new Error(
`Tokenizer initialization failed for repo '${repoName}': ${errorToString(
error
)}`
);
}
} catch (error) {
console.error(`Resource initialization failed: ${errorToString(error)}`);
throw new Error(`Resource initialization failed: ${errorToString(error)}`);
}
return {
name: repoName,
contextLength: contextLength,
sourceColumn,
embed: async (batch: (string | number[])[]): Promise<number[][]> => {
if (batch.length === 0 || batch[0].length === 0) {
return [];
}
if (typeof batch[0][0] === "number") {
return batch as number[][];
}
if (!pipe) {
throw new Error("Pipeline not initialized");
}
try {
const result: number[][] = await Promise.all(
batch.map(async (text) => {
try {
const res = await pipe(text, {
pooling: "mean",
normalize: true,
});
return Array.from(res.data);
} catch (error) {
throw new Error(
`Embedding process failed for text: ${errorToString(error)}`
);
}
})
);
return result;
} catch (error) {
console.error(
`Embedding batch process failed: ${errorToString(error)}`
);
throw new Error(
`Embedding batch process failed: ${errorToString(error)}`
);
}
},
tokenize: (data: (string | number[])[]): string[] => {
if (!tokenizer) {
throw new Error("Tokenizer not initialized");
}
try {
return data.map((text) => {
try {
const res = tokenizer(text);
return res;
} catch (error) {
throw new Error(
`Tokenization process failed for text: ${errorToString(error)}`
);
}
});
} catch (error) {
console.error(
`Tokenization batch process failed: ${errorToString(error)}`
);
throw new Error(
`Tokenization batch process failed: ${errorToString(error)}`
);
}
},
};
}