fix: use Silero v5 model for 32ms frames and lower default thresholds
The legacy model is hardcoded to 1536 samples (96ms frames); v5 uses 512 samples (32ms), reducing gate open latency by 3x. Also lower default positive/negative thresholds to 0.2/0.1 so the gate opens at the first sign of speech rather than waiting for high model confidence. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -61,7 +61,9 @@ export class SileroVADGate {
|
||||
ort.env.wasm.numThreads = 1;
|
||||
|
||||
this.vad = await MicVAD.new({
|
||||
...getDefaultRealTimeVADOptions("legacy"),
|
||||
// v5 model uses 512-sample frames (32ms) vs legacy's fixed 1536 (96ms),
|
||||
// giving 3× faster gate response at the cost of a slightly larger model file.
|
||||
...getDefaultRealTimeVADOptions("v5"),
|
||||
audioContext,
|
||||
baseAssetPath: VAD_BASE_PATH,
|
||||
onnxWASMBasePath: VAD_BASE_PATH,
|
||||
|
||||
@@ -147,9 +147,9 @@ export const noiseGateRelease = new Setting<number>("noise-gate-release", 150);
|
||||
|
||||
export const vadEnabled = new Setting<boolean>("vad-enabled", false);
|
||||
// Probability above which the VAD opens the gate (0–1)
|
||||
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.5);
|
||||
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
|
||||
// Probability below which the VAD closes the gate (0–1)
|
||||
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.35);
|
||||
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
|
||||
|
||||
export const transientSuppressorEnabled = new Setting<boolean>(
|
||||
"transient-suppressor-enabled",
|
||||
|
||||
Reference in New Issue
Block a user