feat: add VAD threshold controls and smooth gate ramp

Replace the hard 0/1 VAD gate with a 20ms ramp in the worklet to prevent
clicks on open/close transitions. Expose positive and negative speech
probability thresholds as user-adjustable settings (defaults 0.5/0.35).
Sliders with restore-defaults button added to the VAD section of the
audio settings tab.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-23 23:57:35 -03:00
parent 1ffee2d25e
commit 859db651e0
5 changed files with 101 additions and 21 deletions

View File

@@ -72,6 +72,9 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// VAD gate state (controlled externally via port message)
private vadGateOpen = true; // starts open until VAD sends its first decision
// Smooth ramp so the VAD gate fades rather than cutting instantly (~20ms)
private vadAttenuation = 1.0;
private readonly vadRampRate = 1.0 / (0.02 * sampleRate);
private logCounter = 0;
@@ -160,7 +163,15 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
}
}
const gain = this.gateAttenuation * transientGain * (this.vadGateOpen ? 1.0 : 0.0);
// Ramp VAD attenuation toward target to avoid clicks on gate open/close
const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
if (this.vadAttenuation < vadTarget) {
this.vadAttenuation = Math.min(vadTarget, this.vadAttenuation + this.vadRampRate);
} else if (this.vadAttenuation > vadTarget) {
this.vadAttenuation = Math.max(vadTarget, this.vadAttenuation - this.vadRampRate);
}
const gain = this.gateAttenuation * transientGain * this.vadAttenuation;
for (let c = 0; c < output.length; c++) {
const inCh = input[c] ?? input[0];

View File

@@ -14,23 +14,23 @@ const log = logger.getChild("[SileroVADGate]");
const VAD_BASE_PATH = "/vad/";
// Speech probability above this value opens the gate; below it closes it.
// vad-web's defaults are positiveSpeechThreshold=0.5, negativeSpeechThreshold=0.35.
// We use those same values so the gate tracks the model's own speech/silence logic.
const SPEECH_OPEN_THRESHOLD = 0.5;
const SPEECH_CLOSE_THRESHOLD = 0.35;
export interface SileroVADGateOptions {
positiveThreshold: number; // open gate when isSpeech >= this (01)
negativeThreshold: number; // close gate when isSpeech < this (01)
}
/**
* Wraps @ricky0123/vad-web's MicVAD to feed per-frame speech probability
* decisions into the NoiseGateTransformer's VAD gate.
*
* Uses onFrameProcessed (fires every ~96ms) rather than the segment-level
* onSpeechStart/onSpeechEnd callbacks. The segment callbacks only fire at
* speech segment boundaries — with purely non-speech noise, onSpeechEnd
* never fires and the gate stays open. Per-frame probability control fixes
* this: the gate closes on the first silent frame.
* onSpeechStart/onSpeechEnd callbacks — those only fire at segment boundaries
* so non-speech noise never triggers onSpeechEnd, keeping the gate open.
* Per-frame probability control with hysteresis fixes this.
*
* The gate starts closed (silent) and opens only once the VAD confirms speech.
* The gate starts OPEN (fail-safe): audio flows immediately and the model
* closes it on the first silent frame. A failed model load therefore
* degrades gracefully instead of permanently muting the user.
*/
export class SileroVADGate {
/** Called each time the gate transitions to open (speech detected). */
@@ -41,11 +41,13 @@ export class SileroVADGate {
private vad: MicVAD | null = null;
private readonly stream: MediaStream;
private readonly audioContext: AudioContext;
private gateOpen = false;
private options: SileroVADGateOptions;
private gateOpen = true;
public constructor(stream: MediaStream, audioContext: AudioContext) {
public constructor(stream: MediaStream, audioContext: AudioContext, options: SileroVADGateOptions) {
this.stream = stream;
this.audioContext = audioContext;
this.options = options;
}
public async start(): Promise<void> {
@@ -72,11 +74,11 @@ export class SileroVADGate {
resumeStream: async (): Promise<MediaStream> => stream,
onFrameProcessed: (probabilities: { isSpeech: number; notSpeech: number }): void => {
const p = probabilities.isSpeech;
if (!this.gateOpen && p >= SPEECH_OPEN_THRESHOLD) {
if (!this.gateOpen && p >= this.options.positiveThreshold) {
this.gateOpen = true;
log.debug("gate open (isSpeech=", p, ")");
this.onOpen();
} else if (this.gateOpen && p < SPEECH_CLOSE_THRESHOLD) {
} else if (this.gateOpen && p < this.options.negativeThreshold) {
this.gateOpen = false;
log.debug("gate close (isSpeech=", p, ")");
this.onClose();
@@ -88,15 +90,14 @@ export class SileroVADGate {
onSpeechRealStart: (): void => {},
});
// Gate starts OPEN so audio flows immediately. The first silence frame
// will close it. This also means a failed model load degrades gracefully
// (audio still flows) rather than permanently muting the user.
this.gateOpen = true;
await this.vad.start();
log.info("MicVAD started");
}
public updateOptions(options: SileroVADGateOptions): void {
this.options = options;
}
public async destroy(): Promise<void> {
if (this.vad) {
await this.vad.destroy();

View File

@@ -33,6 +33,8 @@ import {
transientThreshold as transientThresholdSetting,
transientRelease as transientReleaseSetting,
vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting,
vadNegativeThreshold as vadNegativeThresholdSetting,
} from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider";
@@ -132,6 +134,10 @@ export const SettingsModal: FC<Props> = ({
// Voice activity detection
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
const [vadNegativeThresholdRaw, setVadNegativeThresholdRaw] = useState(vadNegativeThreshold);
// Transient suppressor settings
const [transientEnabled, setTransientEnabled] = useSetting(transientSuppressorEnabledSetting);
@@ -338,6 +344,52 @@ export const SettingsModal: FC<Props> = ({
disabled={!noiseGateEnabled}
/>
</FieldRow>
{vadActive && (
<>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
<p>How confident the model must be before opening the gate.</p>
<Slider
label="VAD open threshold"
value={vadPositiveThresholdRaw}
onValueChange={setVadPositiveThresholdRaw}
onValueCommit={setVadPositiveThreshold}
min={0.1}
max={0.9}
step={0.05}
tooltip={false}
/>
</div>
<div className={styles.volumeSlider}>
<span className={styles.sliderLabel}>Close threshold: {Math.round(vadNegativeThresholdRaw * 100)}%</span>
<p>How low the probability must drop before closing the gate.</p>
<Slider
label="VAD close threshold"
value={vadNegativeThresholdRaw}
onValueChange={setVadNegativeThresholdRaw}
onValueCommit={setVadNegativeThreshold}
min={0.05}
max={0.7}
step={0.05}
tooltip={false}
/>
</div>
<div className={styles.restoreDefaults}>
<Button
kind="secondary"
size="sm"
onClick={(): void => {
const pos = vadPositiveThresholdSetting.defaultValue;
const neg = vadNegativeThresholdSetting.defaultValue;
setVadPositiveThreshold(pos); setVadPositiveThresholdRaw(pos);
setVadNegativeThreshold(neg); setVadNegativeThresholdRaw(neg);
}}
>
Restore defaults
</Button>
</div>
</>
)}
</div>
<div className={styles.noiseGateSection}>
<Heading

View File

@@ -146,6 +146,10 @@ export const noiseGateHold = new Setting<number>("noise-gate-hold", 200);
export const noiseGateRelease = new Setting<number>("noise-gate-release", 150);
export const vadEnabled = new Setting<boolean>("vad-enabled", false);
// Probability above which the VAD opens the gate (01)
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.5);
// Probability below which the VAD closes the gate (01)
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.35);
export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled",

View File

@@ -42,6 +42,8 @@ import {
transientThreshold,
transientRelease,
vadEnabled,
vadPositiveThreshold,
vadNegativeThreshold,
} from "../../../settings/settings.ts";
import {
type NoiseGateParams,
@@ -467,7 +469,10 @@ export class Publisher {
return;
}
const stream = new MediaStream([rawTrack]);
vadGate = new SileroVADGate(stream, ctx);
vadGate = new SileroVADGate(stream, ctx, {
positiveThreshold: vadPositiveThreshold.getValue(),
negativeThreshold: vadNegativeThreshold.getValue(),
});
vadGate.onOpen = (): void => transformer?.setVADOpen(true);
vadGate.onClose = (): void => transformer?.setVADOpen(false);
vadGate.start().catch((e: unknown) => {
@@ -525,6 +530,13 @@ export class Publisher {
}
});
// Push VAD threshold changes to the live gate without recreating it.
combineLatest([vadPositiveThreshold.value$, vadNegativeThreshold.value$])
.pipe(scope.bind())
.subscribe(([positiveThreshold, negativeThreshold]) => {
vadGate?.updateOptions({ positiveThreshold, negativeThreshold });
});
// Push param changes to the live worklet without recreating the processor.
combineLatest([
noiseGateThreshold.value$,