feat: replace Silero VAD with TEN-VAD running inside the AudioWorklet

TEN-VAD (official TEN-framework/ten-vad WASM, no npm dependency) replaces
@ricky0123/vad-web. The WASM module is compiled once on the main thread and
passed to the AudioWorklet via processorOptions, where it is instantiated
synchronously and called every 16 ms with no IPC round-trip.

- Add public/vad/ten_vad.{wasm,js} from official upstream lib/Web/
- NoiseGateProcessor: TenVADRuntime class wraps the Emscripten WASM with
  minimal import stubs; 3:1 decimation accumulates 256 Int16 samples @
  16 kHz per hop; hysteresis controls vadGateOpen directly in-worklet
- NoiseGateTransformer: fetch+compile WASM once (module-level cache),
  pass WebAssembly.Module via processorOptions; remove setVADOpen()
- Publisher: remove all SileroVADGate lifecycle (init/start/stop/destroy,
  rawMicTrack capture); VAD params folded into single combineLatest;
  fix transient suppressor standalone attach (shouldAttach now includes
  transientSuppressorEnabled)
- vite.config.ts: remove viteStaticCopy, serveVadAssets plugin, and all
  vad-web/onnxruntime copy targets (public/vad/ served automatically)
- Remove @ricky0123/vad-web, onnxruntime-web deps and resolution

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 07:43:52 -03:00
parent dbd4eef899
commit dc1f30b84f
10 changed files with 370 additions and 492 deletions

View File

@@ -19,6 +19,10 @@ export interface NoiseGateParams {
transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // ms for suppression to fade after transient ends
// TEN-VAD params — processed entirely inside the AudioWorklet
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
}
/**
@@ -43,13 +47,36 @@ export interface AudioTrackProcessor {
destroy(): Promise<void>;
}
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
function getTenVADModule(): Promise<WebAssembly.Module> {
if (!tenVadModulePromise) {
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
.then((r) => {
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
return r.arrayBuffer();
})
.then((buf) => WebAssembly.compile(buf))
.catch((e) => {
// Clear the cache so a retry is possible on next attach
tenVadModulePromise = null;
throw e;
});
}
return tenVadModulePromise;
}
/**
* LiveKit audio track processor that applies the OBS-style noise gate via
* AudioWorklet.
* LiveKit audio track processor that applies a noise gate, optional transient
* suppressor, and optional TEN-VAD gate via AudioWorklet.
*
* Builds the audio graph: sourceNode → workletNode → destinationNode, then
* exposes destinationNode's track as processedTrack for LiveKit to swap into
* the WebRTC sender via sender.replaceTrack(processedTrack).
* The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
* via processorOptions so it runs synchronously inside the audio thread —
* no IPC round-trip, ~16 ms VAD latency.
*
* Audio graph: sourceNode → workletNode → destinationNode
* processedTrack is destinationNode.stream.getAudioTracks()[0]
*/
export class NoiseGateTransformer implements AudioTrackProcessor {
public readonly name = "noise-gate";
@@ -69,6 +96,15 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
// Fetch and compile the TEN-VAD WASM module (cached after first call)
let tenVadModule: WebAssembly.Module | undefined;
try {
tenVadModule = await getTenVADModule();
log.info("TEN-VAD WASM module compiled");
} catch (e) {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
}
const workletUrl = new URL(
"./NoiseGateProcessor.worklet.ts",
import.meta.url,
@@ -80,8 +116,15 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
this.workletNode = new AudioWorkletNode(
audioContext,
"noise-gate-processor",
{
processorOptions: {
tenVadModule,
},
},
);
this.workletNode.port.onmessage = (e: MessageEvent<{ type: string; msg: string }>): void => {
this.workletNode.port.onmessage = (
e: MessageEvent<{ type: string; msg: string }>,
): void => {
if (e.data?.type === "log") log.debug(e.data.msg);
};
this.sendParams();
@@ -114,17 +157,12 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
this.processedTrack = undefined;
}
/** Push updated gate parameters to the running worklet. */
/** Push updated gate/VAD parameters to the running worklet. */
public updateParams(params: NoiseGateParams): void {
this.params = { ...params };
this.sendParams();
}
/** Tell the worklet to open or close the VAD-controlled gate. */
public setVADOpen(open: boolean): void {
this.workletNode?.port.postMessage({ type: "vad-gate", open });
}
private sendParams(): void {
if (!this.workletNode) return;
log.debug("sendParams:", this.params);