feat: replace Silero VAD with TEN-VAD running inside the AudioWorklet

TEN-VAD (official TEN-framework/ten-vad WASM, no npm dependency) replaces
@ricky0123/vad-web. The WASM module is compiled once on the main thread and
passed to the AudioWorklet via processorOptions, where it is instantiated
synchronously and called every 16 ms with no IPC round-trip.

- Add public/vad/ten_vad.{wasm,js} from official upstream lib/Web/
- NoiseGateProcessor: TenVADRuntime class wraps the Emscripten WASM with
  minimal import stubs; 3:1 decimation accumulates 256 Int16 samples @
  16 kHz per hop; hysteresis controls vadGateOpen directly in-worklet
- NoiseGateTransformer: fetch+compile WASM once (module-level cache),
  pass WebAssembly.Module via processorOptions; remove setVADOpen()
- Publisher: remove all SileroVADGate lifecycle (init/start/stop/destroy,
  rawMicTrack capture); VAD params folded into single combineLatest;
  fix transient suppressor standalone attach (shouldAttach now includes
  transientSuppressorEnabled)
- vite.config.ts: remove viteStaticCopy, serveVadAssets plugin, and all
  vad-web/onnxruntime copy targets (public/vad/ served automatically)
- Remove @ricky0123/vad-web, onnxruntime-web deps and resolution

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 07:43:52 -03:00
parent dbd4eef899
commit dc1f30b84f
10 changed files with 370 additions and 492 deletions

View File

@@ -132,13 +132,11 @@
"vite-plugin-generate-file": "^0.3.0", "vite-plugin-generate-file": "^0.3.0",
"vite-plugin-html": "^3.2.2", "vite-plugin-html": "^3.2.2",
"vite-plugin-node-stdlib-browser": "^0.2.1", "vite-plugin-node-stdlib-browser": "^0.2.1",
"vite-plugin-static-copy": "^4.0.0",
"vite-plugin-svgr": "^4.0.0", "vite-plugin-svgr": "^4.0.0",
"vitest": "^4.0.18", "vitest": "^4.0.18",
"vitest-axe": "^1.0.0-pre.3" "vitest-axe": "^1.0.0-pre.3"
}, },
"resolutions": { "resolutions": {
"onnxruntime-web": "1.18.0",
"@livekit/components-core/rxjs": "^7.8.1", "@livekit/components-core/rxjs": "^7.8.1",
"@livekit/track-processors/@mediapipe/tasks-vision": "^0.10.18", "@livekit/track-processors/@mediapipe/tasks-vision": "^0.10.18",
"minimatch": "^10.2.3", "minimatch": "^10.2.3",
@@ -147,9 +145,5 @@
"qs": "^6.14.1", "qs": "^6.14.1",
"js-yaml": "^4.1.1" "js-yaml": "^4.1.1"
}, },
"packageManager": "yarn@4.7.0", "packageManager": "yarn@4.7.0"
"dependencies": {
"@ricky0123/vad-web": "^0.0.30",
"onnxruntime-web": "1.18.0"
}
} }

30
public/vad/ten_vad.js Normal file
View File

@@ -0,0 +1,30 @@
var createVADModule = (() => {
var _scriptDir = import.meta.url;
return (
function(createVADModule) {
createVADModule = createVADModule || {};
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
return createVADModule.ready
}
);
})();
export default createVADModule;

BIN
public/vad/ten_vad.wasm Normal file

Binary file not shown.

View File

@@ -8,6 +8,9 @@ Please see LICENSE in the repository root for full details.
declare const sampleRate: number; declare const sampleRate: number;
declare class AudioWorkletProcessor { declare class AudioWorkletProcessor {
public readonly port: MessagePort; public readonly port: MessagePort;
public constructor(options?: {
processorOptions?: Record<string, unknown>;
});
public process( public process(
inputs: Float32Array[][], inputs: Float32Array[][],
outputs: Float32Array[][], outputs: Float32Array[][],
@@ -29,6 +32,10 @@ interface NoiseGateParams {
transientEnabled: boolean; transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // how quickly suppression fades after transient ends transientReleaseMs: number; // how quickly suppression fades after transient ends
// TEN-VAD params
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01)
} }
interface VADGateMessage { interface VADGateMessage {
@@ -41,16 +48,142 @@ function dbToLinear(db: number): number {
} }
/** /**
* AudioWorkletProcessor implementing a noise gate and an optional transient * Thin synchronous wrapper around the TEN-VAD Emscripten WASM module.
* suppressor, both running per-sample in a single pass. * Instantiated synchronously in the AudioWorklet constructor from a
* pre-compiled WebAssembly.Module passed via processorOptions.
*/
class TenVADRuntime {
private readonly mem: WebAssembly.Memory;
private readonly freeFn: (ptr: number) => void;
private readonly processFn: (
handle: number,
audioPtr: number,
hopSize: number,
probPtr: number,
flagPtr: number,
) => number;
private readonly destroyFn: (handle: number) => number;
private readonly handle: number;
private readonly audioBufPtr: number;
private readonly probPtr: number;
private readonly flagPtr: number;
public readonly hopSize: number;
public constructor(
module: WebAssembly.Module,
hopSize: number,
threshold: number,
) {
this.hopSize = hopSize;
// Late-bound memory reference — emscripten_resize_heap and memmove
// are only called after instantiation, so closing over this is safe.
const state = { mem: null as WebAssembly.Memory | null };
const imports = {
a: {
// abort
a: (): never => {
throw new Error("ten_vad abort");
},
// fd_write / proc_exit stub
b: (): number => 0,
// emscripten_resize_heap
c: (reqBytes: number): number => {
if (!state.mem) return 0;
try {
const cur = state.mem.buffer.byteLength;
if (cur >= reqBytes) return 1;
state.mem.grow(Math.ceil((reqBytes - cur) / 65536));
return 1;
} catch {
return 0;
}
},
// fd_write stub
d: (): number => 0,
// environ stub
e: (): number => 0,
// memmove
f: (dest: number, src: number, len: number): void => {
if (state.mem) {
new Uint8Array(state.mem.buffer).copyWithin(dest, src, src + len);
}
},
},
};
// Synchronous instantiation — valid in Worker/AudioWorklet global scope
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const instance = new WebAssembly.Instance(module, imports as any);
const asm = instance.exports as {
g: WebAssembly.Memory; // exported memory
h: () => void; // __wasm_call_ctors
i: (n: number) => number; // malloc
j: (p: number) => void; // free
k: (handlePtr: number, hopSize: number, threshold: number) => number; // ten_vad_create
l: (handle: number, audioPtr: number, hopSize: number, probPtr: number, flagPtr: number) => number; // ten_vad_process
m: (handle: number) => number; // ten_vad_destroy
};
state.mem = asm.g;
this.mem = asm.g;
this.freeFn = asm.j;
this.processFn = asm.l;
this.destroyFn = asm.m;
// Run Emscripten static constructors
asm.h();
// Allocate persistent buffers (malloc is 8-byte aligned, so alignment is fine)
this.audioBufPtr = asm.i(hopSize * 2); // Int16Array
this.probPtr = asm.i(4); // float
this.flagPtr = asm.i(4); // int
// Create VAD handle — ten_vad_create(void** handle, int hopSize, float threshold)
const handlePtrPtr = asm.i(4);
const ret = asm.k(handlePtrPtr, hopSize, threshold);
if (ret !== 0) throw new Error(`ten_vad_create failed: ${ret}`);
this.handle = new Int32Array(this.mem.buffer)[handlePtrPtr >> 2];
asm.j(handlePtrPtr);
}
/** Process one hop of Int16 audio. Returns speech probability [01]. */
public process(samples: Int16Array): number {
new Int16Array(this.mem.buffer).set(samples, this.audioBufPtr >> 1);
this.processFn(
this.handle,
this.audioBufPtr,
this.hopSize,
this.probPtr,
this.flagPtr,
);
return new Float32Array(this.mem.buffer)[this.probPtr >> 2];
}
public destroy(): void {
this.destroyFn(this.handle);
this.freeFn(this.audioBufPtr);
this.freeFn(this.probPtr);
this.freeFn(this.flagPtr);
}
}
/**
* AudioWorkletProcessor implementing a noise gate, an optional transient
* suppressor, and an optional in-worklet TEN-VAD gate — all running
* per-sample in a single pass.
* *
* Noise gate: opens when instantaneous peak exceeds threshold, closes below. * Noise gate: opens when instantaneous peak exceeds threshold, closes below.
* Attack, hold, and release times smooth the attenuation envelope. * Attack, hold, and release times smooth the attenuation envelope.
* *
* Transient suppressor: tracks a slow-moving RMS background level. When the * Transient suppressor: tracks a slow-moving RMS background level. When the
* instantaneous peak exceeds the background by more than transientThresholdDb, * instantaneous peak exceeds the background by more than transientThresholdDb,
* gain is instantly cut to 0 and releases over transientReleaseMs. This catches * gain is instantly cut to 0 and releases over transientReleaseMs.
* desk hits, mic bumps, and other sudden loud impacts without affecting speech. *
* TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz),
* runs the TEN-VAD model synchronously every 256 samples (16 ms), and
* controls vadGateOpen with hysteresis. No IPC round-trip required.
*/ */
class NoiseGateProcessor extends AudioWorkletProcessor { class NoiseGateProcessor extends AudioWorkletProcessor {
// Noise gate state // Noise gate state
@@ -65,35 +198,84 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// Transient suppressor state // Transient suppressor state
private transientEnabled = false; private transientEnabled = false;
private transientRatio = dbToLinear(15); // peak must exceed rms by this factor private transientRatio = dbToLinear(15);
private transientReleaseRate = 1.0 / (0.08 * sampleRate); private transientReleaseRate = 1.0 / (0.08 * sampleRate);
private transientAttenuation = 1.0; // 1 = fully open, ramps to 0 on transient private transientAttenuation = 1.0;
private slowRms = 0; private slowRms = 0;
// Exponential smoothing coefficient for background RMS (~200ms time constant)
private rmsCoeff = Math.exp(-1.0 / (0.2 * sampleRate)); private rmsCoeff = Math.exp(-1.0 / (0.2 * sampleRate));
// VAD gate state (controlled externally via port message) // VAD gate state
private vadGateOpen = true; // starts open until VAD sends its first decision private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
// Smooth ramp so the VAD gate fades rather than cutting instantly (~20ms)
private vadAttenuation = 1.0; private vadAttenuation = 1.0;
private readonly vadRampRate = 1.0 / (0.02 * sampleRate); private readonly vadRampRate = 1.0 / (0.02 * sampleRate);
// TEN-VAD state
private vadEnabled = false;
private vadPositiveThreshold = 0.5;
private vadNegativeThreshold = 0.3;
private tenVadRuntime: TenVADRuntime | null = null;
// 3:1 decimation from AudioContext sample rate to 16 kHz
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
private decPhase = 0;
private decAcc = 0;
private readonly vadHopBuf = new Int16Array(256);
private vadHopCount = 0;
private logCounter = 0; private logCounter = 0;
public constructor() { public constructor(options?: {
super(); processorOptions?: Record<string, unknown>;
this.port.onmessage = (e: MessageEvent<NoiseGateParams | VADGateMessage>): void => { }) {
super(options);
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
const tenVadModule = options?.processorOptions?.tenVadModule as
| WebAssembly.Module
| undefined;
if (tenVadModule) {
try {
// hopSize = 256 samples @ 16 kHz = 16 ms; threshold = 0.5 (overridden via params)
this.tenVadRuntime = new TenVADRuntime(tenVadModule, 256, 0.5);
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
});
} catch (e) {
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] TEN-VAD init failed: " + String(e),
});
}
}
this.port.onmessage = (
e: MessageEvent<NoiseGateParams | VADGateMessage>,
): void => {
if ((e.data as VADGateMessage).type === "vad-gate") { if ((e.data as VADGateMessage).type === "vad-gate") {
this.vadGateOpen = (e.data as VADGateMessage).open; this.vadGateOpen = (e.data as VADGateMessage).open;
} else { } else {
this.updateParams(e.data as NoiseGateParams); this.updateParams(e.data as NoiseGateParams);
} }
}; };
this.updateParams({ this.updateParams({
noiseGateActive: true, threshold: -60, attackMs: 25, holdMs: 200, releaseMs: 150, noiseGateActive: true,
transientEnabled: false, transientThresholdDb: 15, transientReleaseMs: 80, threshold: -60,
attackMs: 25,
holdMs: 200,
releaseMs: 150,
transientEnabled: false,
transientThresholdDb: 15,
transientReleaseMs: 80,
vadEnabled: false,
vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3,
});
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate,
}); });
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate });
} }
private updateParams(p: NoiseGateParams): void { private updateParams(p: NoiseGateParams): void {
@@ -105,11 +287,17 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.transientEnabled = p.transientEnabled; this.transientEnabled = p.transientEnabled;
this.transientRatio = dbToLinear(p.transientThresholdDb); this.transientRatio = dbToLinear(p.transientThresholdDb);
this.transientReleaseRate = 1.0 / ((p.transientReleaseMs / 1000) * sampleRate); this.transientReleaseRate = 1.0 / ((p.transientReleaseMs / 1000) * sampleRate);
this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
// When VAD is disabled, open the gate immediately
if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({ this.port.postMessage({
type: "log", type: "log",
msg: "[NoiseGate worklet] params updated: threshold=" + p.threshold msg: "[NoiseGate worklet] params updated: threshold=" + p.threshold
+ " transientEnabled=" + p.transientEnabled + " vadEnabled=" + p.vadEnabled
+ " transientThresholdDb=" + p.transientThresholdDb, + " vadPos=" + p.vadPositiveThreshold
+ " vadNeg=" + p.vadNegativeThreshold,
}); });
} }
@@ -132,19 +320,18 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// --- Transient suppressor --- // --- Transient suppressor ---
let transientGain = 1.0; let transientGain = 1.0;
if (this.transientEnabled) { if (this.transientEnabled) {
// Update slow RMS background (exponential moving average of energy)
this.slowRms = Math.sqrt( this.slowRms = Math.sqrt(
this.rmsCoeff * this.slowRms * this.slowRms + this.rmsCoeff * this.slowRms * this.slowRms +
(1.0 - this.rmsCoeff) * curLevel * curLevel, (1.0 - this.rmsCoeff) * curLevel * curLevel,
); );
const background = Math.max(this.slowRms, 1e-6); const background = Math.max(this.slowRms, 1e-6);
if (curLevel > background * this.transientRatio) { if (curLevel > background * this.transientRatio) {
// Transient detected — instantly cut gain
this.transientAttenuation = 0.0; this.transientAttenuation = 0.0;
} else { } else {
// Release: ramp back toward 1 this.transientAttenuation = Math.min(
this.transientAttenuation = Math.min(1.0, this.transientAttenuation + this.transientReleaseRate); 1.0,
this.transientAttenuation + this.transientReleaseRate,
);
} }
transientGain = this.transientAttenuation; transientGain = this.transientAttenuation;
} }
@@ -159,23 +346,66 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.isOpen = false; this.isOpen = false;
} }
if (this.isOpen) { if (this.isOpen) {
this.gateAttenuation = Math.min(1.0, this.gateAttenuation + this.attackRate); this.gateAttenuation = Math.min(
1.0,
this.gateAttenuation + this.attackRate,
);
} else { } else {
this.heldTime += samplePeriod; this.heldTime += samplePeriod;
if (this.heldTime > this.holdTime) { if (this.heldTime > this.holdTime) {
this.gateAttenuation = Math.max(0.0, this.gateAttenuation - this.releaseRate); this.gateAttenuation = Math.max(
0.0,
this.gateAttenuation - this.releaseRate,
);
} }
} }
} else { } else {
this.gateAttenuation = 1.0; this.gateAttenuation = 1.0;
} }
// Ramp VAD attenuation toward target to avoid clicks on gate open/close // --- TEN-VAD in-worklet processing ---
// Accumulate raw mono samples with decRatio:1 decimation (48 kHz → 16 kHz).
// Every 256 output samples (16 ms) run the WASM VAD and update vadGateOpen.
if (this.vadEnabled && this.tenVadRuntime !== null) {
this.decAcc += input[0]?.[i] ?? 0;
this.decPhase++;
if (this.decPhase >= this.decRatio) {
this.decPhase = 0;
const avg = this.decAcc / this.decRatio;
this.decAcc = 0;
// Float32 [-1,1] → Int16 with clamping
const s16 =
avg >= 1.0
? 32767
: avg <= -1.0
? -32768
: (avg * 32767 + 0.5) | 0;
this.vadHopBuf[this.vadHopCount++] = s16;
if (this.vadHopCount >= 256) {
this.vadHopCount = 0;
const prob = this.tenVadRuntime.process(this.vadHopBuf);
if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) {
this.vadGateOpen = true;
} else if (this.vadGateOpen && prob < this.vadNegativeThreshold) {
this.vadGateOpen = false;
}
}
}
}
// Ramp VAD attenuation toward target to avoid clicks
const vadTarget = this.vadGateOpen ? 1.0 : 0.0; const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
if (this.vadAttenuation < vadTarget) { if (this.vadAttenuation < vadTarget) {
this.vadAttenuation = Math.min(vadTarget, this.vadAttenuation + this.vadRampRate); this.vadAttenuation = Math.min(
vadTarget,
this.vadAttenuation + this.vadRampRate,
);
} else if (this.vadAttenuation > vadTarget) { } else if (this.vadAttenuation > vadTarget) {
this.vadAttenuation = Math.max(vadTarget, this.vadAttenuation - this.vadRampRate); this.vadAttenuation = Math.max(
vadTarget,
this.vadAttenuation - this.vadRampRate,
);
} }
const gain = this.gateAttenuation * transientGain * this.vadAttenuation; const gain = this.gateAttenuation * transientGain * this.vadAttenuation;
@@ -196,7 +426,8 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
msg: "[NoiseGate worklet] gateOpen=" + this.isOpen msg: "[NoiseGate worklet] gateOpen=" + this.isOpen
+ " gateAtten=" + this.gateAttenuation.toFixed(3) + " gateAtten=" + this.gateAttenuation.toFixed(3)
+ " transientAtten=" + this.transientAttenuation.toFixed(3) + " transientAtten=" + this.transientAttenuation.toFixed(3)
+ " slowRms=" + this.slowRms.toFixed(5), + " vadOpen=" + this.vadGateOpen
+ " vadAtten=" + this.vadAttenuation.toFixed(3),
}); });
} }

View File

@@ -19,6 +19,10 @@ export interface NoiseGateParams {
transientEnabled: boolean; transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // ms for suppression to fade after transient ends transientReleaseMs: number; // ms for suppression to fade after transient ends
// TEN-VAD params — processed entirely inside the AudioWorklet
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
} }
/** /**
@@ -43,13 +47,36 @@ export interface AudioTrackProcessor {
destroy(): Promise<void>; destroy(): Promise<void>;
} }
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
function getTenVADModule(): Promise<WebAssembly.Module> {
if (!tenVadModulePromise) {
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
.then((r) => {
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
return r.arrayBuffer();
})
.then((buf) => WebAssembly.compile(buf))
.catch((e) => {
// Clear the cache so a retry is possible on next attach
tenVadModulePromise = null;
throw e;
});
}
return tenVadModulePromise;
}
/** /**
* LiveKit audio track processor that applies the OBS-style noise gate via * LiveKit audio track processor that applies a noise gate, optional transient
* AudioWorklet. * suppressor, and optional TEN-VAD gate via AudioWorklet.
* *
* Builds the audio graph: sourceNode → workletNode → destinationNode, then * The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
* exposes destinationNode's track as processedTrack for LiveKit to swap into * via processorOptions so it runs synchronously inside the audio thread —
* the WebRTC sender via sender.replaceTrack(processedTrack). * no IPC round-trip, ~16 ms VAD latency.
*
* Audio graph: sourceNode → workletNode → destinationNode
* processedTrack is destinationNode.stream.getAudioTracks()[0]
*/ */
export class NoiseGateTransformer implements AudioTrackProcessor { export class NoiseGateTransformer implements AudioTrackProcessor {
public readonly name = "noise-gate"; public readonly name = "noise-gate";
@@ -69,6 +96,15 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params); log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
// Fetch and compile the TEN-VAD WASM module (cached after first call)
let tenVadModule: WebAssembly.Module | undefined;
try {
tenVadModule = await getTenVADModule();
log.info("TEN-VAD WASM module compiled");
} catch (e) {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
}
const workletUrl = new URL( const workletUrl = new URL(
"./NoiseGateProcessor.worklet.ts", "./NoiseGateProcessor.worklet.ts",
import.meta.url, import.meta.url,
@@ -80,8 +116,15 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
this.workletNode = new AudioWorkletNode( this.workletNode = new AudioWorkletNode(
audioContext, audioContext,
"noise-gate-processor", "noise-gate-processor",
{
processorOptions: {
tenVadModule,
},
},
); );
this.workletNode.port.onmessage = (e: MessageEvent<{ type: string; msg: string }>): void => { this.workletNode.port.onmessage = (
e: MessageEvent<{ type: string; msg: string }>,
): void => {
if (e.data?.type === "log") log.debug(e.data.msg); if (e.data?.type === "log") log.debug(e.data.msg);
}; };
this.sendParams(); this.sendParams();
@@ -114,17 +157,12 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
this.processedTrack = undefined; this.processedTrack = undefined;
} }
/** Push updated gate parameters to the running worklet. */ /** Push updated gate/VAD parameters to the running worklet. */
public updateParams(params: NoiseGateParams): void { public updateParams(params: NoiseGateParams): void {
this.params = { ...params }; this.params = { ...params };
this.sendParams(); this.sendParams();
} }
/** Tell the worklet to open or close the VAD-controlled gate. */
public setVADOpen(open: boolean): void {
this.workletNode?.port.postMessage({ type: "vad-gate", open });
}
private sendParams(): void { private sendParams(): void {
if (!this.workletNode) return; if (!this.workletNode) return;
log.debug("sendParams:", this.params); log.debug("sendParams:", this.params);

View File

@@ -1,128 +0,0 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { MicVAD, getDefaultRealTimeVADOptions } from "@ricky0123/vad-web";
// ort is not re-exported from the vad-web index; import from the submodule
import { ort } from "@ricky0123/vad-web/dist/real-time-vad.js";
import { logger } from "matrix-js-sdk/lib/logger";
const log = logger.getChild("[SileroVADGate]");
const VAD_BASE_PATH = "/vad/";
export interface SileroVADGateOptions {
positiveThreshold: number; // open gate when isSpeech >= this (01)
negativeThreshold: number; // close gate when isSpeech < this (01)
}
/**
* Wraps @ricky0123/vad-web's MicVAD with a two-phase lifecycle:
*
* init(audioContext) — loads the ONNX model and ORT WASM (expensive,
* call as early as possible for zero-latency enable)
* start(stream) — wires the stream and begins per-frame processing
* stop() — pauses processing, keeps model loaded
* destroy() — full teardown
*
* Uses onFrameProcessed (fires every ~32ms with v5 model) with hysteresis
* to control the gate. Starts OPEN so audio flows immediately; the model
* closes it on the first silent frame.
*/
export class SileroVADGate {
public onOpen: () => void = () => {};
public onClose: () => void = () => {};
private vad: MicVAD | null = null;
private activeStream: MediaStream | null = null;
private options: SileroVADGateOptions;
private gateOpen = true;
public constructor(options: SileroVADGateOptions) {
this.options = options;
}
/**
* Phase 1 — load the model. Call this as early as possible (e.g. when the
* AudioContext is first created) so start() is near-instant later.
*/
public async init(audioContext: AudioContext): Promise<void> {
// Avoid requiring SharedArrayBuffer (COOP/COEP headers) by running
// single-threaded. Performance is sufficient for 16kHz speech frames.
ort.env.wasm.numThreads = 1;
log.info("pre-warming MicVAD model");
this.vad = await MicVAD.new({
...getDefaultRealTimeVADOptions("v5"),
audioContext,
baseAssetPath: VAD_BASE_PATH,
onnxWASMBasePath: VAD_BASE_PATH,
startOnLoad: false,
// Stream is provided via activeStream at start() time
// eslint-disable-next-line @typescript-eslint/require-await
getStream: async (): Promise<MediaStream> => {
if (!this.activeStream) throw new Error("[VAD] stream not set — call start() first");
return this.activeStream;
},
// eslint-disable-next-line @typescript-eslint/require-await
pauseStream: async (): Promise<void> => {},
// eslint-disable-next-line @typescript-eslint/require-await
resumeStream: async (): Promise<MediaStream> => {
if (!this.activeStream) throw new Error("[VAD] stream not set");
return this.activeStream;
},
onFrameProcessed: (probabilities: { isSpeech: number; notSpeech: number }): void => {
const p = probabilities.isSpeech;
if (!this.gateOpen && p >= this.options.positiveThreshold) {
this.gateOpen = true;
log.debug("gate open (isSpeech=", p, ")");
this.onOpen();
} else if (this.gateOpen && p < this.options.negativeThreshold) {
this.gateOpen = false;
log.debug("gate close (isSpeech=", p, ")");
this.onClose();
}
},
onSpeechStart: (): void => {},
onSpeechEnd: (): void => {},
onVADMisfire: (): void => {},
onSpeechRealStart: (): void => {},
});
log.info("MicVAD model loaded");
}
/**
* Phase 2 — wire the raw mic stream and begin classifying frames.
* init() must have completed first.
*/
public async start(stream: MediaStream): Promise<void> {
if (!this.vad) throw new Error("[VAD] call init() before start()");
this.activeStream = stream;
this.gateOpen = true; // start open — first silent frame will close it
await this.vad.start();
log.info("MicVAD started");
}
/** Pause frame processing without destroying the model. */
public async stop(): Promise<void> {
if (this.vad) await this.vad.pause();
this.activeStream = null;
}
public updateOptions(options: SileroVADGateOptions): void {
this.options = options;
}
public async destroy(): Promise<void> {
if (this.vad) {
await this.vad.destroy();
this.vad = null;
}
this.activeStream = null;
}
}

View File

@@ -336,7 +336,7 @@ export const SettingsModal: FC<Props> = ({
id="vadEnabled" id="vadEnabled"
type="checkbox" type="checkbox"
label="Enable voice activity detection" label="Enable voice activity detection"
description="Uses the Silero VAD model to mute audio when no speech is detected." description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)."
checked={vadActive} checked={vadActive}
onChange={(e: ChangeEvent<HTMLInputElement>): void => onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setVadActive(e.target.checked) setVadActive(e.target.checked)

View File

@@ -49,7 +49,6 @@ import {
type NoiseGateParams, type NoiseGateParams,
NoiseGateTransformer, NoiseGateTransformer,
} from "../../../livekit/NoiseGateTransformer.ts"; } from "../../../livekit/NoiseGateTransformer.ts";
import { SileroVADGate } from "../../../livekit/SileroVADGate.ts";
import { observeTrackReference$ } from "../../observeTrackReference"; import { observeTrackReference$ } from "../../observeTrackReference";
import { type Connection } from "../remoteMembers/Connection.ts"; import { type Connection } from "../remoteMembers/Connection.ts";
import { ObservableScope } from "../../ObservableScope.ts"; import { ObservableScope } from "../../ObservableScope.ts";
@@ -439,12 +438,6 @@ export class Publisher {
let transformer: NoiseGateTransformer | null = null; let transformer: NoiseGateTransformer | null = null;
let audioCtx: AudioContext | null = null; let audioCtx: AudioContext | null = null;
// Single VAD gate instance — persists across start/stop to keep model warm
let vadGate: SileroVADGate | null = new SileroVADGate({
positiveThreshold: vadPositiveThreshold.getValue(),
negativeThreshold: vadNegativeThreshold.getValue(),
});
let rawMicTrack: MediaStreamTrack | null = null;
const currentParams = (): NoiseGateParams => ({ const currentParams = (): NoiseGateParams => ({
noiseGateActive: noiseGateEnabled.getValue(), noiseGateActive: noiseGateEnabled.getValue(),
@@ -455,98 +448,55 @@ export class Publisher {
transientEnabled: transientSuppressorEnabled.getValue(), transientEnabled: transientSuppressorEnabled.getValue(),
transientThresholdDb: transientThreshold.getValue(), transientThresholdDb: transientThreshold.getValue(),
transientReleaseMs: transientRelease.getValue(), transientReleaseMs: transientRelease.getValue(),
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadPositiveThreshold.getValue(),
vadNegativeThreshold: vadNegativeThreshold.getValue(),
}); });
const stopVAD = (): void => { // Attach / detach processor when any processing feature changes or the track changes.
if (vadGate) { combineLatest([audioTrack$, noiseGateEnabled.value$, vadEnabled.value$, transientSuppressorEnabled.value$])
void vadGate.stop();
}
// Always reopen gate when VAD stops so audio flows without VAD
transformer?.setVADOpen(true);
};
const startVAD = (rawTrack: MediaStreamTrack): void => {
if (!vadGate) return;
const stream = new MediaStream([rawTrack]);
vadGate.onOpen = (): void => transformer?.setVADOpen(true);
vadGate.onClose = (): void => transformer?.setVADOpen(false);
vadGate.start(stream).catch((e: unknown) => {
this.logger.error("[VAD] failed to start", e);
});
};
// Attach / detach processor when noise gate or VAD enabled state or the track changes.
combineLatest([audioTrack$, noiseGateEnabled.value$, vadEnabled.value$])
.pipe(scope.bind()) .pipe(scope.bind())
.subscribe(([audioTrack, ngEnabled, vadActive]) => { .subscribe(([audioTrack, ngEnabled, vadActive, transientActive]) => {
if (!audioTrack) return; if (!audioTrack) return;
const shouldAttach = ngEnabled || vadActive; const shouldAttach = ngEnabled || vadActive || transientActive;
if (shouldAttach && !audioTrack.getProcessor()) { if (shouldAttach && !audioTrack.getProcessor()) {
const params = currentParams(); const params = currentParams();
this.logger.info("[NoiseGate] attaching processor, params:", params); this.logger.info("[NoiseGate] attaching processor, params:", params);
// Capture the raw mic track BEFORE setProcessor replaces it
// eslint-disable-next-line @typescript-eslint/no-explicit-any
rawMicTrack = (audioTrack as any).mediaStreamTrack ?? null;
transformer = new NoiseGateTransformer(params); transformer = new NoiseGateTransformer(params);
audioCtx = new AudioContext(); audioCtx = new AudioContext();
this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state); this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(audioCtx); (audioTrack as any).setAudioContext(audioCtx);
// Pre-warm VAD model as soon as AudioContext is created
if (vadGate && audioCtx) {
vadGate.init(audioCtx).catch((e: unknown) => {
this.logger.error("[VAD] failed to pre-warm model", e);
});
}
audioCtx.resume().then(async () => { audioCtx.resume().then(async () => {
this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state); this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state);
return audioTrack // eslint-disable-next-line @typescript-eslint/no-explicit-any
// eslint-disable-next-line @typescript-eslint/no-explicit-any return audioTrack.setProcessor(transformer as any);
.setProcessor(transformer as any);
}).then(() => { }).then(() => {
this.logger.info("[NoiseGate] setProcessor resolved"); this.logger.info("[NoiseGate] setProcessor resolved");
if (vadActive && rawMicTrack) startVAD(rawMicTrack);
}).catch((e: unknown) => { }).catch((e: unknown) => {
this.logger.error("[NoiseGate] setProcessor failed", e); this.logger.error("[NoiseGate] setProcessor failed", e);
}); });
} else if (!shouldAttach && audioTrack.getProcessor()) { } else if (!shouldAttach && audioTrack.getProcessor()) {
this.logger.info("[NoiseGate] removing processor"); this.logger.info("[NoiseGate] removing processor");
stopVAD();
void audioTrack.stopProcessor(); void audioTrack.stopProcessor();
void audioCtx?.close(); void audioCtx?.close();
audioCtx = null; audioCtx = null;
transformer = null; transformer = null;
rawMicTrack = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(undefined); (audioTrack as any).setAudioContext(undefined);
} else if (shouldAttach && audioTrack.getProcessor()) { } else if (shouldAttach && audioTrack.getProcessor()) {
// Processor already attached — push updated params (e.g. noiseGateActive toggled) // Processor already attached — push updated params (e.g. noiseGateActive toggled)
transformer?.updateParams(currentParams()); transformer?.updateParams(currentParams());
} else { } else {
this.logger.info("[NoiseGate] tick — ngEnabled:", ngEnabled, "vadActive:", vadActive, "hasProcessor:", !!audioTrack.getProcessor()); this.logger.info(
"[NoiseGate] tick — ngEnabled:", ngEnabled,
"vadActive:", vadActive,
"hasProcessor:", !!audioTrack.getProcessor(),
);
} }
}); });
// Start/stop VAD when its toggle changes. // Push all param changes (noise gate + VAD) to the live worklet.
combineLatest([audioTrack$, vadEnabled.value$])
.pipe(scope.bind())
.subscribe(([, enabled]) => {
if (!rawMicTrack) return;
if (enabled) {
startVAD(rawMicTrack);
} else {
stopVAD();
}
});
// Push VAD threshold changes to the live gate without recreating it.
combineLatest([vadPositiveThreshold.value$, vadNegativeThreshold.value$])
.pipe(scope.bind())
.subscribe(([positiveThreshold, negativeThreshold]) => {
vadGate?.updateOptions({ positiveThreshold, negativeThreshold });
});
// Push param changes to the live worklet without recreating the processor.
combineLatest([ combineLatest([
noiseGateEnabled.value$, noiseGateEnabled.value$,
noiseGateThreshold.value$, noiseGateThreshold.value$,
@@ -556,23 +506,24 @@ export class Publisher {
transientSuppressorEnabled.value$, transientSuppressorEnabled.value$,
transientThreshold.value$, transientThreshold.value$,
transientRelease.value$, transientRelease.value$,
vadEnabled.value$,
vadPositiveThreshold.value$,
vadNegativeThreshold.value$,
]) ])
.pipe(scope.bind()) .pipe(scope.bind())
.subscribe(([noiseGateActive, threshold, attackMs, holdMs, releaseMs, .subscribe(([
transientEnabled, transientThresholdDb, transientReleaseMs]) => { noiseGateActive, threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs,
vadActive, vadPos, vadNeg,
]) => {
transformer?.updateParams({ transformer?.updateParams({
noiseGateActive, threshold, attackMs, holdMs, releaseMs, noiseGateActive, threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs, transientEnabled, transientThresholdDb, transientReleaseMs,
vadEnabled: vadActive,
vadPositiveThreshold: vadPos,
vadNegativeThreshold: vadNeg,
}); });
}); });
// Destroy VAD gate when scope ends (processor fully torn down)
scope.onEnd(() => {
if (vadGate) {
void vadGate.destroy();
vadGate = null;
}
});
} }
private observeTrackProcessors( private observeTrackProcessors(

View File

@@ -7,14 +7,12 @@ Please see LICENSE in the repository root for full details.
import { import {
loadEnv, loadEnv,
PluginOption,
searchForWorkspaceRoot, searchForWorkspaceRoot,
type ConfigEnv, type ConfigEnv,
type UserConfig, type UserConfig,
} from "vite"; } from "vite";
import svgrPlugin from "vite-plugin-svgr"; import svgrPlugin from "vite-plugin-svgr";
import { createHtmlPlugin } from "vite-plugin-html"; import { createHtmlPlugin } from "vite-plugin-html";
import { viteStaticCopy } from "vite-plugin-static-copy";
import { codecovVitePlugin } from "@codecov/vite-plugin"; import { codecovVitePlugin } from "@codecov/vite-plugin";
import { sentryVitePlugin } from "@sentry/vite-plugin"; import { sentryVitePlugin } from "@sentry/vite-plugin";
@@ -22,7 +20,6 @@ import { sentryVitePlugin } from "@sentry/vite-plugin";
import react from "@vitejs/plugin-react"; import react from "@vitejs/plugin-react";
import { realpathSync } from "fs"; import { realpathSync } from "fs";
import * as fs from "node:fs"; import * as fs from "node:fs";
import * as path from "node:path";
// https://vitejs.dev/config/ // https://vitejs.dev/config/
// Modified type helper from defineConfig to allow for packageType (see defineConfig from vite) // Modified type helper from defineConfig to allow for packageType (see defineConfig from vite)
@@ -36,59 +33,8 @@ export default ({
// In future we might be able to do what is needed via code splitting at // In future we might be able to do what is needed via code splitting at
// build time. // build time.
process.env.VITE_PACKAGE = packageType ?? "full"; process.env.VITE_PACKAGE = packageType ?? "full";
// Serve VAD assets (ONNX model, WASM, worklet bundle) from node_modules
// during dev. vite-plugin-static-copy only runs during build.
const serveVadAssets: PluginOption = {
name: "serve-vad-assets",
configureServer(server) {
const mimeTypes: Record<string, string> = {
".wasm": "application/wasm",
".onnx": "application/octet-stream",
".js": "application/javascript",
".mjs": "application/javascript",
};
const sourceDirs = [
path.join(process.cwd(), "node_modules/@ricky0123/vad-web/dist"),
path.join(process.cwd(), "node_modules/onnxruntime-web/dist"),
];
server.middlewares.use("/vad", (req, res, next) => {
const filename = (req.url ?? "/").replace(/^\//, "").split("?")[0];
for (const dir of sourceDirs) {
const filePath = path.join(dir, filename);
if (fs.existsSync(filePath)) {
const mime = mimeTypes[path.extname(filePath)] ?? "application/octet-stream";
res.setHeader("Content-Type", mime);
fs.createReadStream(filePath).pipe(res);
return;
}
}
next();
});
},
};
const plugins: PluginOption[] = [ const plugins = [
serveVadAssets,
viteStaticCopy({
targets: [
{
src: "node_modules/@ricky0123/vad-web/dist/vad.worklet.bundle.min.js",
dest: "vad",
},
{
src: "node_modules/@ricky0123/vad-web/dist/silero_vad_legacy.onnx",
dest: "vad",
},
{
src: "node_modules/@ricky0123/vad-web/dist/silero_vad_v5.onnx",
dest: "vad",
},
{
src: "node_modules/onnxruntime-web/dist/*.wasm",
dest: "vad",
},
],
}),
react(), react(),
svgrPlugin({ svgrPlugin({
svgrOptions: { svgrOptions: {

186
yarn.lock
View File

@@ -3841,79 +3841,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@protobufjs/aspromise@npm:^1.1.1, @protobufjs/aspromise@npm:^1.1.2":
version: 1.1.2
resolution: "@protobufjs/aspromise@npm:1.1.2"
checksum: 10c0/a83343a468ff5b5ec6bff36fd788a64c839e48a07ff9f4f813564f58caf44d011cd6504ed2147bf34835bd7a7dd2107052af755961c6b098fd8902b4f6500d0f
languageName: node
linkType: hard
"@protobufjs/base64@npm:^1.1.2":
version: 1.1.2
resolution: "@protobufjs/base64@npm:1.1.2"
checksum: 10c0/eec925e681081af190b8ee231f9bad3101e189abbc182ff279da6b531e7dbd2a56f1f306f37a80b1be9e00aa2d271690d08dcc5f326f71c9eed8546675c8caf6
languageName: node
linkType: hard
"@protobufjs/codegen@npm:^2.0.4":
version: 2.0.4
resolution: "@protobufjs/codegen@npm:2.0.4"
checksum: 10c0/26ae337c5659e41f091606d16465bbcc1df1f37cc1ed462438b1f67be0c1e28dfb2ca9f294f39100c52161aef82edf758c95d6d75650a1ddf31f7ddee1440b43
languageName: node
linkType: hard
"@protobufjs/eventemitter@npm:^1.1.0":
version: 1.1.0
resolution: "@protobufjs/eventemitter@npm:1.1.0"
checksum: 10c0/1eb0a75180e5206d1033e4138212a8c7089a3d418c6dfa5a6ce42e593a4ae2e5892c4ef7421f38092badba4040ea6a45f0928869989411001d8c1018ea9a6e70
languageName: node
linkType: hard
"@protobufjs/fetch@npm:^1.1.0":
version: 1.1.0
resolution: "@protobufjs/fetch@npm:1.1.0"
dependencies:
"@protobufjs/aspromise": "npm:^1.1.1"
"@protobufjs/inquire": "npm:^1.1.0"
checksum: 10c0/cda6a3dc2d50a182c5865b160f72077aac197046600091dbb005dd0a66db9cce3c5eaed6d470ac8ed49d7bcbeef6ee5f0bc288db5ff9a70cbd003e5909065233
languageName: node
linkType: hard
"@protobufjs/float@npm:^1.0.2":
version: 1.0.2
resolution: "@protobufjs/float@npm:1.0.2"
checksum: 10c0/18f2bdede76ffcf0170708af15c9c9db6259b771e6b84c51b06df34a9c339dbbeec267d14ce0bddd20acc142b1d980d983d31434398df7f98eb0c94a0eb79069
languageName: node
linkType: hard
"@protobufjs/inquire@npm:^1.1.0":
version: 1.1.0
resolution: "@protobufjs/inquire@npm:1.1.0"
checksum: 10c0/64372482efcba1fb4d166a2664a6395fa978b557803857c9c03500e0ac1013eb4b1aacc9ed851dd5fc22f81583670b4f4431bae186f3373fedcfde863ef5921a
languageName: node
linkType: hard
"@protobufjs/path@npm:^1.1.2":
version: 1.1.2
resolution: "@protobufjs/path@npm:1.1.2"
checksum: 10c0/cece0a938e7f5dfd2fa03f8c14f2f1cf8b0d6e13ac7326ff4c96ea311effd5fb7ae0bba754fbf505312af2e38500250c90e68506b97c02360a43793d88a0d8b4
languageName: node
linkType: hard
"@protobufjs/pool@npm:^1.1.0":
version: 1.1.0
resolution: "@protobufjs/pool@npm:1.1.0"
checksum: 10c0/eda2718b7f222ac6e6ad36f758a92ef90d26526026a19f4f17f668f45e0306a5bd734def3f48f51f8134ae0978b6262a5c517c08b115a551756d1a3aadfcf038
languageName: node
linkType: hard
"@protobufjs/utf8@npm:^1.1.0":
version: 1.1.0
resolution: "@protobufjs/utf8@npm:1.1.0"
checksum: 10c0/a3fe31fe3fa29aa3349e2e04ee13dc170cc6af7c23d92ad49e3eeaf79b9766264544d3da824dba93b7855bd6a2982fb40032ef40693da98a136d835752beb487
languageName: node
linkType: hard
"@radix-ui/number@npm:1.1.1": "@radix-ui/number@npm:1.1.1":
version: 1.1.1 version: 1.1.1
resolution: "@radix-ui/number@npm:1.1.1" resolution: "@radix-ui/number@npm:1.1.1"
@@ -5095,15 +5022,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@ricky0123/vad-web@npm:^0.0.30":
version: 0.0.30
resolution: "@ricky0123/vad-web@npm:0.0.30"
dependencies:
onnxruntime-web: "npm:^1.17.0"
checksum: 10c0/c91e69fb65879d54eb3eeab3cdadca95b9d1fffe3fbecf8c7ab1d59f418b79ad6e1cd0e67df25db26cf286e8fa86e030ce95ee51a749727e344f7c588d689770
languageName: node
linkType: hard
"@rolldown/pluginutils@npm:1.0.0-beta.27": "@rolldown/pluginutils@npm:1.0.0-beta.27":
version: 1.0.0-beta.27 version: 1.0.0-beta.27
resolution: "@rolldown/pluginutils@npm:1.0.0-beta.27" resolution: "@rolldown/pluginutils@npm:1.0.0-beta.27"
@@ -5922,15 +5840,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@types/node@npm:>=13.7.0":
version: 25.5.0
resolution: "@types/node@npm:25.5.0"
dependencies:
undici-types: "npm:~7.18.0"
checksum: 10c0/70c508165b6758c4f88d4f91abca526c3985eee1985503d4c2bd994dbaf588e52ac57e571160f18f117d76e963570ac82bd20e743c18987e82564312b3b62119
languageName: node
linkType: hard
"@types/node@npm:^24.0.0": "@types/node@npm:^24.0.0":
version: 24.10.13 version: 24.10.13
resolution: "@types/node@npm:24.10.13" resolution: "@types/node@npm:24.10.13"
@@ -7478,7 +7387,7 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"chokidar@npm:^3.5.3, chokidar@npm:^3.6.0": "chokidar@npm:^3.5.3":
version: 3.6.0 version: 3.6.0
resolution: "chokidar@npm:3.6.0" resolution: "chokidar@npm:3.6.0"
dependencies: dependencies:
@@ -8394,7 +8303,6 @@ __metadata:
"@radix-ui/react-slider": "npm:^1.1.2" "@radix-ui/react-slider": "npm:^1.1.2"
"@radix-ui/react-visually-hidden": "npm:^1.0.3" "@radix-ui/react-visually-hidden": "npm:^1.0.3"
"@react-spring/web": "npm:^10.0.0" "@react-spring/web": "npm:^10.0.0"
"@ricky0123/vad-web": "npm:^0.0.30"
"@sentry/react": "npm:^8.0.0" "@sentry/react": "npm:^8.0.0"
"@sentry/vite-plugin": "npm:^3.0.0" "@sentry/vite-plugin": "npm:^3.0.0"
"@stylistic/eslint-plugin": "npm:^3.0.0" "@stylistic/eslint-plugin": "npm:^3.0.0"
@@ -8450,7 +8358,6 @@ __metadata:
node-stdlib-browser: "npm:^1.3.1" node-stdlib-browser: "npm:^1.3.1"
normalize.css: "npm:^8.0.1" normalize.css: "npm:^8.0.1"
observable-hooks: "npm:^4.2.3" observable-hooks: "npm:^4.2.3"
onnxruntime-web: "npm:1.18.0"
pako: "npm:^2.0.4" pako: "npm:^2.0.4"
postcss: "npm:^8.4.41" postcss: "npm:^8.4.41"
postcss-preset-env: "npm:^10.0.0" postcss-preset-env: "npm:^10.0.0"
@@ -8473,7 +8380,6 @@ __metadata:
vite-plugin-generate-file: "npm:^0.3.0" vite-plugin-generate-file: "npm:^0.3.0"
vite-plugin-html: "npm:^3.2.2" vite-plugin-html: "npm:^3.2.2"
vite-plugin-node-stdlib-browser: "npm:^0.2.1" vite-plugin-node-stdlib-browser: "npm:^0.2.1"
vite-plugin-static-copy: "npm:^4.0.0"
vite-plugin-svgr: "npm:^4.0.0" vite-plugin-svgr: "npm:^4.0.0"
vitest: "npm:^4.0.18" vitest: "npm:^4.0.18"
vitest-axe: "npm:^1.0.0-pre.3" vitest-axe: "npm:^1.0.0-pre.3"
@@ -9636,13 +9542,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"flatbuffers@npm:^1.12.0":
version: 1.12.0
resolution: "flatbuffers@npm:1.12.0"
checksum: 10c0/14f9fb55f3063b389fc4e3d67ee8ffc71e351b478369fdae919701e458fc7766f2fea4f51181d6e73b4930fbc018cb7013558930367811b960cff01f23aae618
languageName: node
linkType: hard
"flatted@npm:^3.2.9": "flatted@npm:^3.2.9":
version: 3.3.1 version: 3.3.1
resolution: "flatted@npm:3.3.1" resolution: "flatted@npm:3.3.1"
@@ -10059,13 +9958,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"guid-typescript@npm:^1.0.9":
version: 1.0.9
resolution: "guid-typescript@npm:1.0.9"
checksum: 10c0/fa0a2b2b4e06e0976a81c947b74e114b92f6647e84b52e24ab0981d2fdbaa1a640641d8fd269004dd7c581baebeb4f9d9782b74391e717e47c9b822bea4b3be6
languageName: node
linkType: hard
"gulp-sort@npm:^2.0.0": "gulp-sort@npm:^2.0.0":
version: 2.0.0 version: 2.0.0
resolution: "gulp-sort@npm:2.0.0" resolution: "gulp-sort@npm:2.0.0"
@@ -11336,13 +11228,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"long@npm:^5.0.0, long@npm:^5.2.3":
version: 5.3.2
resolution: "long@npm:5.3.2"
checksum: 10c0/7130fe1cbce2dca06734b35b70d380ca3f70271c7f8852c922a7c62c86c4e35f0c39290565eca7133c625908d40e126ac57c02b1b1a4636b9457d77e1e60b981
languageName: node
linkType: hard
"loose-envify@npm:^1.4.0": "loose-envify@npm:^1.4.0":
version: 1.4.0 version: 1.4.0
resolution: "loose-envify@npm:1.4.0" resolution: "loose-envify@npm:1.4.0"
@@ -12047,27 +11932,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"onnxruntime-common@npm:1.18.0":
version: 1.18.0
resolution: "onnxruntime-common@npm:1.18.0"
checksum: 10c0/dc5caab4c5b95b4255ba3dcd5ea3e04e10a3a8c605d60c53dac25facfbe9d59a60c510b8ffc3e5f1c8816fded479b6f946b7840a0d10cbd32914ef8ce81c7218
languageName: node
linkType: hard
"onnxruntime-web@npm:1.18.0":
version: 1.18.0
resolution: "onnxruntime-web@npm:1.18.0"
dependencies:
flatbuffers: "npm:^1.12.0"
guid-typescript: "npm:^1.0.9"
long: "npm:^5.2.3"
onnxruntime-common: "npm:1.18.0"
platform: "npm:^1.3.6"
protobufjs: "npm:^7.2.4"
checksum: 10c0/90d9eed3e8d8bcfea61858eee2a1b94c680c67f6265bb5924752ae1bd9db93cb2dd8bc562ffba6b048fc32c721fcc30fd0b0669da5607fc6c41eaecaba3d4bd7
languageName: node
linkType: hard
"optionator@npm:^0.9.3": "optionator@npm:^0.9.3":
version: 0.9.4 version: 0.9.4
resolution: "optionator@npm:0.9.4" resolution: "optionator@npm:0.9.4"
@@ -12212,13 +12076,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"p-map@npm:^7.0.4":
version: 7.0.4
resolution: "p-map@npm:7.0.4"
checksum: 10c0/a5030935d3cb2919d7e89454d1ce82141e6f9955413658b8c9403cfe379283770ed3048146b44cde168aa9e8c716505f196d5689db0ae3ce9a71521a2fef3abd
languageName: node
linkType: hard
"p-retry@npm:7": "p-retry@npm:7":
version: 7.0.0 version: 7.0.0
resolution: "p-retry@npm:7.0.0" resolution: "p-retry@npm:7.0.0"
@@ -12471,13 +12328,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"platform@npm:^1.3.6":
version: 1.3.6
resolution: "platform@npm:1.3.6"
checksum: 10c0/69f2eb692e15f1a343dd0d9347babd9ca933824c8673096be746ff66f99f2bdc909fadd8609076132e6ec768349080babb7362299f2a7f885b98f1254ae6224b
languageName: node
linkType: hard
"playwright-core@npm:1.58.2": "playwright-core@npm:1.58.2":
version: 1.58.2 version: 1.58.2
resolution: "playwright-core@npm:1.58.2" resolution: "playwright-core@npm:1.58.2"
@@ -13033,26 +12883,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"protobufjs@npm:^7.2.4":
version: 7.5.4
resolution: "protobufjs@npm:7.5.4"
dependencies:
"@protobufjs/aspromise": "npm:^1.1.2"
"@protobufjs/base64": "npm:^1.1.2"
"@protobufjs/codegen": "npm:^2.0.4"
"@protobufjs/eventemitter": "npm:^1.1.0"
"@protobufjs/fetch": "npm:^1.1.0"
"@protobufjs/float": "npm:^1.0.2"
"@protobufjs/inquire": "npm:^1.1.0"
"@protobufjs/path": "npm:^1.1.2"
"@protobufjs/pool": "npm:^1.1.0"
"@protobufjs/utf8": "npm:^1.1.0"
"@types/node": "npm:>=13.7.0"
long: "npm:^5.0.0"
checksum: 10c0/913b676109ffb3c05d3d31e03a684e569be91f3bba8613da4a683d69d9dba948daa2afd7d2e7944d1aa6c417890c35d9d9a8883c1160affafb0f9670d59ef722
languageName: node
linkType: hard
"proxy-from-env@npm:^1.1.0": "proxy-from-env@npm:^1.1.0":
version: 1.1.0 version: 1.1.0
resolution: "proxy-from-env@npm:1.1.0" resolution: "proxy-from-env@npm:1.1.0"
@@ -15507,20 +15337,6 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"vite-plugin-static-copy@npm:^4.0.0":
version: 4.0.0
resolution: "vite-plugin-static-copy@npm:4.0.0"
dependencies:
chokidar: "npm:^3.6.0"
p-map: "npm:^7.0.4"
picocolors: "npm:^1.1.1"
tinyglobby: "npm:^0.2.15"
peerDependencies:
vite: ^6.0.0 || ^7.0.0 || ^8.0.0
checksum: 10c0/05b42b6f942e81b838f828c596fc6d00f72c0b5e1c4d68df13aaa4405c96d71333a082cc1bba96d0153134191addff8b2fe9134ed2e725b3b808e3b8470497c8
languageName: node
linkType: hard
"vite-plugin-svgr@npm:^4.0.0": "vite-plugin-svgr@npm:^4.0.0":
version: 4.5.0 version: 4.5.0
resolution: "vite-plugin-svgr@npm:4.5.0" resolution: "vite-plugin-svgr@npm:4.5.0"