fix: use Silero v5 model for 32ms frames and lower default thresholds

The legacy model is hardcoded to 1536 samples (96ms frames); v5 uses 512
samples (32ms), reducing gate open latency by 3x. Also lower default
positive/negative thresholds to 0.2/0.1 so the gate opens at the first
sign of speech rather than waiting for high model confidence.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 00:02:17 -03:00
parent 859db651e0
commit aff09d0e49
3 changed files with 9 additions and 3 deletions

View File

@@ -61,7 +61,9 @@ export class SileroVADGate {
ort.env.wasm.numThreads = 1; ort.env.wasm.numThreads = 1;
this.vad = await MicVAD.new({ this.vad = await MicVAD.new({
...getDefaultRealTimeVADOptions("legacy"), // v5 model uses 512-sample frames (32ms) vs legacy's fixed 1536 (96ms),
// giving 3× faster gate response at the cost of a slightly larger model file.
...getDefaultRealTimeVADOptions("v5"),
audioContext, audioContext,
baseAssetPath: VAD_BASE_PATH, baseAssetPath: VAD_BASE_PATH,
onnxWASMBasePath: VAD_BASE_PATH, onnxWASMBasePath: VAD_BASE_PATH,

View File

@@ -147,9 +147,9 @@ export const noiseGateRelease = new Setting<number>("noise-gate-release", 150);
export const vadEnabled = new Setting<boolean>("vad-enabled", false); export const vadEnabled = new Setting<boolean>("vad-enabled", false);
// Probability above which the VAD opens the gate (01) // Probability above which the VAD opens the gate (01)
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.5); export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
// Probability below which the VAD closes the gate (01) // Probability below which the VAD closes the gate (01)
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.35); export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
export const transientSuppressorEnabled = new Setting<boolean>( export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled", "transient-suppressor-enabled",

View File

@@ -79,6 +79,10 @@ export default ({
src: "node_modules/@ricky0123/vad-web/dist/silero_vad_legacy.onnx", src: "node_modules/@ricky0123/vad-web/dist/silero_vad_legacy.onnx",
dest: "vad", dest: "vad",
}, },
{
src: "node_modules/@ricky0123/vad-web/dist/silero_vad_v5.onnx",
dest: "vad",
},
{ {
src: "node_modules/onnxruntime-web/dist/*.wasm", src: "node_modules/onnxruntime-web/dist/*.wasm",
dest: "vad", dest: "vad",