transformers.js Unsupported model type: whisper and CORS error

const transcribe = async (
    audio,
    model,
    multilingual,
    quantized,
    subtask,
    language,
) => {
    // TODO use subtask and language

    //If multilingual is true, it adds an empty string ("") to the model name, effectively leaving it unchanged. If multilingual is false, it appends ".en" to the model name, indicating that the model should be used for English transcription.

    const modelName = `Xenova/whisper-${model}${multilingual ? "" : ".en"}`;
    console.log('modelName',modelName)
    

    const p = AutomaticSpeechRecognitionPipelineFactory;
  

    //// Check if the current model settings are different from the new settings
    if (p.model !== modelName || p.quantized !== quantized) {
        // Invalidate model if different
        // Update the model name and quantized status
        p.model = modelName;
        p.quantized = quantized;

        // Check if there is an existing instance of the ASR pipeline
        if (p.instance !== null) {

            // Dispose of the existing instance (clean up resources)
            (await p.getInstance()).dispose();


            // Set the instance to null (indicating that it needs to be recreated)
            p.instance = null;
        }
    }

    // Load transcriber model
    let transcriber = await p.getInstance((data) => {
        self.postMessage(data);
    });

    const time_precision =
        transcriber.processor.feature_extractor.config.chunk_length /
        transcriber.model.config.max_source_positions;

    // Storage for chunks to be processed. Initialise with an empty chunk.
    let chunks_to_process = [
        {
            tokens: [],
            finalised: false,
        },
    ];

    // TODO: Storage for fully-processed and merged chunks
    // let decoded_chunks = [];

    function chunk_callback(chunk) {
        let last = chunks_to_process[chunks_to_process.length - 1];

        // Overwrite last chunk with new info
        Object.assign(last, chunk);
        last.finalised = true;

        // Create an empty chunk after, if it not the last chunk
        if (!chunk.is_last) {
            chunks_to_process.push({
                tokens: [],
                finalised: false,
            });
        }
    }

    // Inject custom callback function to handle merging of chunks
    function callback_function(item) {
        let last = chunks_to_process[chunks_to_process.length - 1];

        // Update tokens of last chunk
        last.tokens = [...item[0].output_token_ids];

        // Merge text chunks
        // TODO optimise so we don't have to decode all chunks every time
        let data = transcriber.tokenizer._decode_asr(chunks_to_process, {
            time_precision: time_precision,
            return_timestamps: true,
            force_full_sequences: false,
        });

        self.postMessage({
            status: "update",
            task: "automatic-speech-recognition",
            data: data,
        });
    }

    // Actually run transcription
    let output = await transcriber(audio, {
        // Greedy
        top_k: 0,
        do_sample: false,

        // Sliding window
        chunk_length_s: 30,
        stride_length_s: 5,

        // Language and task
        language: language,
        task: subtask,

        // Return timestamps
        return_timestamps: true,
        force_full_sequences: false,

        // Callback functions
        callback_function: callback_function, // after each generation step
        chunk_callback: chunk_callback, // after each chunk is processed
    }).catch((error) => {
        
        self.postMessage({
            status: "error",
            task: "automatic-speech-recognition",
            data: error,
        });
        return null;
    });

    return output;
};

Sep 18 '23 06:09 rojitdhakal

  static async getInstance(progress_callback = null) {
        
        if (this.instance === null) {
            this.instance = pipeline(this.task, this.model, {
                quantized: this.quantized,
                progress_callback,
            });
        }
        console.log("inside",this.instance)
        return this.instance;
    }    '

while consoling this.instances it shows

   Promise {<pending>}
[[Prototype]]
: 
Promise
[[PromiseState]]
: 
"rejected"
[[PromiseResult]]
: 
Error: Unsupported model type: whisper at AutoModelForCTC.from_pretrained (webpack-internal:///./node_modules/.pnpm/@[email protected]/node_modules/@xenova/transformers/src/models.js:3550:19) at async eval (webpack-internal:///./node_modules/.pnpm/@[email protected]/node_modules/@xenova/transformers/src/pipelines.js:2087:33)
message
: 
"Unsupported model type: whisper"
stack
: 
"Error: Unsupported model type: whisper\n    at AutoModelForCTC.from_pretrained (webpack-internal:///./node_modules/.pnpm/@[email protected]/node_modules/@xenova/transformers/src/models.js:3550:19)\n    at async eval (webpack-internal:///./node_modules/.pnpm/@[email protected]/node_modules/@xenova/transformers/src/pipelines.js:2087:33)"

Sep 18 '23 09:09 rojitdhakal

Hi there. I believe this is due to an issue we just fixed in v2.6.1 (related to minification). Could you please upgrade to v2.6.1 and try again? Thanks!

Sep 18 '23 13:09 xenova

I just upgraded v2.6.1 again the same error persists??

Sep 19 '23 04:09 rojitdhakal

Could you please post information about your environment, e.g., OS, browser, built tools?

I am aware of a similar issue with users that use create-react-app, and if this is the case, please switch to a more up-to-date build tool like Vite.

Sep 19 '23 10:09 xenova

OS: Windows 11 Browser: Chrome-117.0.5938.89 build tools: create-next-app

Sep 19 '23 11:09 rojitdhakal

build tools: create-next-app

Please try using Vite for your project. CRA has been removed from the React documentation. See here for more information.

Sep 19 '23 11:09 xenova

we are using next JS ? There is no any support Vite for next js apllication

Sep 19 '23 11:09 rojitdhakal

Oh my apologies, I misread "create-next-app" as "create-react-app". Sorry about that!

Could you post any information about your build process, such as any minification taking place?

Sep 19 '23 11:09 xenova

I am facing this locally in development server without minification .

Sep 19 '23 11:09 rojitdhakal

Do you perhaps have a repo where I can try reproduce this? Or could you post your next.config.js? Thanks!

Sep 19 '23 11:09 xenova

We are currently working in the private repo. We will share the repo later if required, need to prepare for that , but Now Here's the next config

/** @type {import('next').NextConfig} */

const nextConfig = {
  reactStrictMode: true,
  compress: false,

  images: {
    loader: "akamai",
    path: "",
  },
  compiler: {
    // Enables the styled-components SWC transform
    styledComponents: true,
  },
  // lessLoaderOptions: {
  //   lessOptions: {
  //     javascriptEnabled: true,
  //   },
  // },
  webpack(config) {
    config.module.rules.push({
      test: /\.svg$/,
      use: ["@svgr/webpack"],
    });
    return config;
  },
};

module.exports = nextConfig;

Sep 19 '23 11:09 rojitdhakal

And which version of node / next.js / npm are you using?

Sep 19 '23 11:09 xenova

next-version:13.4.13 node-version:16.15.0 pnpm-version :7.23.0

Sep 19 '23 12:09 rojitdhakal

node-version:16.15.0

This might be the issue. In the docs, we recommend using a minimum node version of 18. 16.X has reached EOL. Could you try upgrade?

Sep 19 '23 12:09 xenova

I tried to run whisper model via automatic-speech-recognition pipeline and got same error caused by unsupported AutoModelForCTC, this PR might have introduced bug: https://github.com/xenova/transformers.js/pull/220/files?file-filters%5B%5D=.js&show-viewed-files=true#diff-2f6b66f61363f7b45e1b165f81d3ce15b3768da43e40410085aee8bd8666a629R1739

Oct 01 '23 20:10 szprytny

@szprytny Could you provide more information about your environment? Are you using the latest version of Transformers.js?

Oct 02 '23 09:10 xenova

I have node 18.9.1 transformers.js 2.6.2 When I removed declaration of AutoModelForCTC from https://github.com/xenova/transformers.js/blob/main/src/pipelines.js#L1953

Pipeline went further. I got error Unsupported model IR version: 9 which I was able to pass by overriding onnxruntime-node in my project's package.json

Oct 02 '23 12:10 szprytny

And which bundler are you using? I am aware of issues with create-react-app. I haven't had any problems with vite, for example.

I got error Unsupported model IR version: 9

Yes this is because you exported with onnx >= 14, and Transformers.js still uses onnxruntime-web v1.14 (which only supports a max IR version of 8). See here for an issue I files a while ago.

Oct 02 '23 12:10 xenova

I did not run it as a web-app, I just tried to do inference using plain node script running with npx tsx

Oct 02 '23 13:10 szprytny

@szprytny Can you provide some sample code which resulted in this error?

Oct 02 '23 13:10 xenova

It seems that error

Unsupported model type: whisper

is misleading as the real problem was my model have newer IR version. It seems that error related to this is not handled well enough and results in calling from_pretrained for AutoModelForCTC class in loadItems function

Here is the script I used to run it

import { WaveFile } from "wavefile";
import path from "path";
import { readFileSync } from "fs";
import { pipeline, env } from "@xenova/transformers";

env.localModelPath = "c:/model/onnx/";

const prepareAudio = (filePath: string): Float64Array => {
  const wav = new WaveFile(readFileSync(path.normalize(filePath)));
  wav.toBitDepth("32f");
  wav.toSampleRate(16000);
  let audioData = wav.getSamples();

  return audioData;
};

const test = async () => {
  let pipe = await pipeline("automatic-speech-recognition", "shmisper", {
    local_files_only: true,
  });

  let out = await pipe(prepareAudio("c:/content/01_0.wav"));
  console.log(out);
};

test();

Oct 02 '23 13:10 szprytny

I see... Indeed, that error message would be quite misleading. Could you try downgrade to onnx==1.13.1 and re-export your model? See https://github.com/xenova/transformers.js/blob/main/scripts/requirements.txt for the other recommended versions.

Oct 02 '23 14:10 xenova

I have the extact same problem. I changed the onnx version to 1.13.1. Small model works but not medium and large-v2 models

Oct 25 '23 09:10 Keith-Hon

Having same issue as main thread:

OSX 14.1 Sonoma
M2 chip
Node 18.12.1
transformers.js 2.10.1

You mentioned here that we should use onnx==1.13.1 per your conversion scripts. Does Huggingface's Optimum conversion script (ie optimum-cli export onnx --model model_id also work with your script? I noticed it doesnt move all the ONNX files to their own folder (something I can do manually), but is the conversion processes of exporting to ONNX the same? If so, is optimum-cli using a different version of onnx than what your repo is using?

Dec 11 '23 22:12 dmmagdal

Yes, we use optimum behind the scenes. The purpose of the conversion script is to also perform quantization afterwards, but if this is not necessary for your use-case, you can use optimum directly and just structure the repo as the other transformers.js models on the HF Hub.

Dec 11 '23 22:12 xenova

I converted the whisper-base model to onnx using optimum-cli and moved the model files to the onnx folder locally and verified my env had the same modules from your requirements.txt version. Why I tried to run my inference script (NodeJS) here I still end up with errors output.txt

Dec 13 '23 17:12 dmmagdal

@xenova I could reproduce this error on the v3 branch with the example whisper-word-timestamps. If I go to worker.js and change the model_id from onnx-community/whisper-base_timestamped to Xenova/whisper-large-v3 I get the error: Unsupported model type: whisper

Jul 23 '24 19:07 itsyoboieltr

same, Screenshot 2024-08-26 at 18 36 21 when trying to use distil-whisper/distil-medium.en on Whisper WebGPU Unsupported model type: whisper "@huggingface/transformers": "^3.0.0-alpha.9",

Aug 26 '24 17:08 nsenkevich

transformers.js transformers.js copied to clipboard

Unsupported model type: whisper and CORS error

transformers.js
transformers.js copied to clipboard