[FEATURE] Add local fast embed support for apiless free embedding
Describe the feature you'd like A clear and concise description of what you would like Flowise to have. Add fastembed support: https://github.com/Anush008/fastembed-js
Additional context Add any other context or screenshots about the feature request here. N/A
I am looking for this as well, but I would suggest using transformers.js - it uses ONNX as well, so it should be fast, too.
I had Claude barf out a first draft.
import { Embeddings, EmbeddingsParams } from '@langchain/core/embeddings';
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface';
// Import types from transformers.js
type Pipeline = any;
type Tensor = {
dims: number[];
type: string;
data: Float32Array | Int8Array;
size: number;
};
class TransformersJSEmbedding implements INode {
label: string;
name: string;
version: number;
type: string;
icon: string;
category: string;
description: string;
baseClasses: string[];
inputs: INodeParams[];
constructor() {
this.label = 'TransformersJS Embeddings';
this.name = 'transformersJSEmbeddings';
this.version = 1.0;
this.type = 'TransformersJS Embeddings';
this.icon = 'transformers.png'; // You'll need to add this icon to your assets
this.category = 'Embeddings';
this.description = 'Generate embeddings locally using Transformers.js';
this.baseClasses = [this.type, 'Embeddings'];
this.inputs = [
{
label: 'Model Name',
name: 'modelName',
type: 'string',
default: 'Xenova/all-MiniLM-L6-v2',
placeholder: 'Xenova/all-MiniLM-L6-v2'
},
{
label: 'Pooling Strategy',
name: 'pooling',
type: 'options',
options: [
{ label: 'Mean', value: 'mean' },
{ label: 'Max', value: 'max' },
{ label: 'Min', value: 'min' },
{ label: 'None', value: 'none' }
],
default: 'mean'
},
{
label: 'Normalize Embeddings',
name: 'normalize',
type: 'boolean',
default: true
},
{
label: 'Quantize',
name: 'quantize',
type: 'boolean',
default: false,
description: 'Reduce the size of embeddings by quantizing them'
},
{
label: 'Precision',
name: 'precision',
type: 'options',
options: [
{ label: 'Float32', value: 'float32' },
{ label: 'Binary', value: 'binary' }
],
default: 'float32',
description: 'Precision to use when quantizing embeddings',
additionalParams: true,
optional: true
}
];
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const modelName = nodeData.inputs?.modelName as string;
const pooling = nodeData.inputs?.pooling as string;
const normalize = nodeData.inputs?.normalize as boolean;
const quantize = nodeData.inputs?.quantize as boolean;
const precision = nodeData.inputs?.precision as string;
// Import transformers dynamically
let { pipeline } = await import('@xenova/transformers');
// Create embeddings class
const embeddings = new TransformersJSEmbeddings({
modelName,
pipeline,
pooling,
normalize,
quantize,
precision
});
return embeddings;
}
}
interface TransformersJSEmbeddingsParams extends EmbeddingsParams {
modelName: string;
pipeline: any;
pooling?: string;
normalize?: boolean;
quantize?: boolean;
precision?: string;
}
class TransformersJSEmbeddings extends Embeddings {
modelName: string;
pipeline: any;
pipelineInstance: Pipeline | null;
pooling: string;
normalize: boolean;
quantize: boolean;
precision: string;
constructor(params: TransformersJSEmbeddingsParams) {
super(params);
this.modelName = params.modelName;
this.pipeline = params.pipeline;
this.pipelineInstance = null;
this.pooling = params.pooling || 'mean';
this.normalize = params.normalize !== false;
this.quantize = params.quantize === true;
this.precision = params.precision || 'float32';
}
async initialize(): Promise<void> {
if (!this.pipelineInstance) {
this.pipelineInstance = await this.pipeline('feature-extraction', this.modelName);
}
}
/**
* Convert a tensor to a numeric array
*/
private tensorToArray(tensor: Tensor): number[] {
return Array.from(tensor.data);
}
/**
* Embed a single document
*/
async embedQuery(text: string): Promise<number[]> {
await this.initialize();
const options = {
pooling: this.pooling,
normalize: this.normalize
};
if (this.quantize) {
// @ts-ignore
options.quantize = true;
// @ts-ignore
options.precision = this.precision;
}
const output = await this.pipelineInstance(text, options);
return this.tensorToArray(output);
}
/**
* Embed multiple documents
*/
async embedDocuments(documents: string[]): Promise<number[][]> {
const embeddings = [];
for (const document of documents) {
const embedding = await this.embedQuery(document);
embeddings.push(embedding);
}
return embeddings;
}
}
module.exports = { nodeClass: TransformersJSEmbedding };
I've no idea if that makes any sense at all. (I don't have time to learn or build this right now.)
I spent a while on this back then and had issues with musl support for onnx or something and couldn't get it fully working outside of a bun bin compile.
Haven't looked since then and moved on to other projects :p