From aff09d0e49ff8ddde0cd451598a9d364f29e3d4d Mon Sep 17 00:00:00 2001 From: mk Date: Tue, 24 Mar 2026 00:02:17 -0300 Subject: [PATCH] fix: use Silero v5 model for 32ms frames and lower default thresholds The legacy model is hardcoded to 1536 samples (96ms frames); v5 uses 512 samples (32ms), reducing gate open latency by 3x. Also lower default positive/negative thresholds to 0.2/0.1 so the gate opens at the first sign of speech rather than waiting for high model confidence. Co-Authored-By: Claude Sonnet 4.6 --- src/livekit/SileroVADGate.ts | 4 +++- src/settings/settings.ts | 4 ++-- vite.config.ts | 4 ++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/livekit/SileroVADGate.ts b/src/livekit/SileroVADGate.ts index 101f8f1f..5a951d5e 100644 --- a/src/livekit/SileroVADGate.ts +++ b/src/livekit/SileroVADGate.ts @@ -61,7 +61,9 @@ export class SileroVADGate { ort.env.wasm.numThreads = 1; this.vad = await MicVAD.new({ - ...getDefaultRealTimeVADOptions("legacy"), + // v5 model uses 512-sample frames (32ms) vs legacy's fixed 1536 (96ms), + // giving 3× faster gate response at the cost of a slightly larger model file. + ...getDefaultRealTimeVADOptions("v5"), audioContext, baseAssetPath: VAD_BASE_PATH, onnxWASMBasePath: VAD_BASE_PATH, diff --git a/src/settings/settings.ts b/src/settings/settings.ts index f097510e..c609812a 100644 --- a/src/settings/settings.ts +++ b/src/settings/settings.ts @@ -147,9 +147,9 @@ export const noiseGateRelease = new Setting("noise-gate-release", 150); export const vadEnabled = new Setting("vad-enabled", false); // Probability above which the VAD opens the gate (0–1) -export const vadPositiveThreshold = new Setting("vad-positive-threshold", 0.5); +export const vadPositiveThreshold = new Setting("vad-positive-threshold", 0.2); // Probability below which the VAD closes the gate (0–1) -export const vadNegativeThreshold = new Setting("vad-negative-threshold", 0.35); +export const vadNegativeThreshold = new Setting("vad-negative-threshold", 0.1); export const transientSuppressorEnabled = new Setting( "transient-suppressor-enabled", diff --git a/vite.config.ts b/vite.config.ts index 5b64430b..c8236884 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -79,6 +79,10 @@ export default ({ src: "node_modules/@ricky0123/vad-web/dist/silero_vad_legacy.onnx", dest: "vad", }, + { + src: "node_modules/@ricky0123/vad-web/dist/silero_vad_v5.onnx", + dest: "vad", + }, { src: "node_modules/onnxruntime-web/dist/*.wasm", dest: "vad",