feat: add VAD mode setting — standard vs aggressive latency

Standard: 5 ms open / 20 ms close ramp (comfortable feel)
Aggressive: 1 ms open / 5 ms close ramp (lowest possible latency)

The mode is surfaced as a radio selector in Settings → Audio → Voice
activity detection, visible while VAD is enabled. Wired through
NoiseGateParams.vadAggressive → worklet updateParams.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 07:48:41 -03:00
parent 025735c490
commit e95e613c08
5 changed files with 45 additions and 5 deletions

View File

@@ -36,6 +36,7 @@ interface NoiseGateParams {
vadEnabled: boolean; vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01) vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01) vadNegativeThreshold: number; // close gate when prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close; false: 5 ms / 20 ms
} }
interface VADGateMessage { interface VADGateMessage {
@@ -209,9 +210,9 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// VAD gate state // VAD gate state
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
private vadAttenuation = 1.0; private vadAttenuation = 1.0;
// Asymmetric ramp: fast open to avoid masking speech onset, slow close to de-click // Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
private readonly vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
private readonly vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
// TEN-VAD state // TEN-VAD state
private vadEnabled = false; private vadEnabled = false;
@@ -275,6 +276,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
vadEnabled: false, vadEnabled: false,
vadPositiveThreshold: 0.5, vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3, vadNegativeThreshold: 0.3,
vadAggressive: false,
}); });
this.port.postMessage({ this.port.postMessage({
@@ -295,6 +297,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.vadEnabled = p.vadEnabled ?? false; this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5; this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3; this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
if (p.vadAggressive) {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
}
// When VAD is disabled, open the gate immediately // When VAD is disabled, open the gate immediately
if (!this.vadEnabled) this.vadGateOpen = true; if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({ this.port.postMessage({

View File

@@ -23,6 +23,7 @@ export interface NoiseGateParams {
vadEnabled: boolean; vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01) vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01) vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
} }
/** /**

View File

@@ -35,6 +35,7 @@ import {
vadEnabled as vadEnabledSetting, vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting, vadPositiveThreshold as vadPositiveThresholdSetting,
vadNegativeThreshold as vadNegativeThresholdSetting, vadNegativeThreshold as vadNegativeThresholdSetting,
vadMode as vadModeSetting,
} from "./settings"; } from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab"; import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider"; import { Slider } from "../Slider";
@@ -134,6 +135,7 @@ export const SettingsModal: FC<Props> = ({
// Voice activity detection // Voice activity detection
const [vadActive, setVadActive] = useSetting(vadEnabledSetting); const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting); const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold); const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting); const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
@@ -336,7 +338,7 @@ export const SettingsModal: FC<Props> = ({
id="vadEnabled" id="vadEnabled"
type="checkbox" type="checkbox"
label="Enable voice activity detection" label="Enable voice activity detection"
description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)." description="Uses TEN-VAD to mute audio when no speech is detected (~10 ms latency)."
checked={vadActive} checked={vadActive}
onChange={(e: ChangeEvent<HTMLInputElement>): void => onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setVadActive(e.target.checked) setVadActive(e.target.checked)
@@ -345,6 +347,28 @@ export const SettingsModal: FC<Props> = ({
</FieldRow> </FieldRow>
{vadActive && ( {vadActive && (
<> <>
<FieldRow>
<InputField
id="vadModeStandard"
type="radio"
name="vadMode"
label="Standard"
description="5 ms open / 20 ms close ramp — comfortable feel."
checked={vadModeValue === "standard"}
onChange={(): void => setVadModeValue("standard")}
/>
</FieldRow>
<FieldRow>
<InputField
id="vadModeAggressive"
type="radio"
name="vadMode"
label="Aggressive"
description="1 ms open / 5 ms close ramp — lowest possible latency."
checked={vadModeValue === "aggressive"}
onChange={(): void => setVadModeValue("aggressive")}
/>
</FieldRow>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}> <div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span> <span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
<p>How confident the model must be before opening the gate.</p> <p>How confident the model must be before opening the gate.</p>

View File

@@ -150,6 +150,8 @@ export const vadEnabled = new Setting<boolean>("vad-enabled", false);
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2); export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
// Probability below which the VAD closes the gate (01) // Probability below which the VAD closes the gate (01)
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1); export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
// standard: 5 ms open / 20 ms close ramp aggressive: 1 ms open / 5 ms close ramp
export const vadMode = new Setting<"standard" | "aggressive">("vad-mode", "standard");
export const transientSuppressorEnabled = new Setting<boolean>( export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled", "transient-suppressor-enabled",

View File

@@ -44,6 +44,7 @@ import {
vadEnabled, vadEnabled,
vadPositiveThreshold, vadPositiveThreshold,
vadNegativeThreshold, vadNegativeThreshold,
vadMode,
} from "../../../settings/settings.ts"; } from "../../../settings/settings.ts";
import { import {
type NoiseGateParams, type NoiseGateParams,
@@ -451,6 +452,7 @@ export class Publisher {
vadEnabled: vadEnabled.getValue(), vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadPositiveThreshold.getValue(), vadPositiveThreshold: vadPositiveThreshold.getValue(),
vadNegativeThreshold: vadNegativeThreshold.getValue(), vadNegativeThreshold: vadNegativeThreshold.getValue(),
vadAggressive: vadMode.getValue() === "aggressive",
}); });
// Attach / detach processor when any processing feature changes or the track changes. // Attach / detach processor when any processing feature changes or the track changes.
@@ -509,12 +511,13 @@ export class Publisher {
vadEnabled.value$, vadEnabled.value$,
vadPositiveThreshold.value$, vadPositiveThreshold.value$,
vadNegativeThreshold.value$, vadNegativeThreshold.value$,
vadMode.value$,
]) ])
.pipe(scope.bind()) .pipe(scope.bind())
.subscribe(([ .subscribe(([
noiseGateActive, threshold, attackMs, holdMs, releaseMs, noiseGateActive, threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs, transientEnabled, transientThresholdDb, transientReleaseMs,
vadActive, vadPos, vadNeg, vadActive, vadPos, vadNeg, vadModeValue,
]) => { ]) => {
transformer?.updateParams({ transformer?.updateParams({
noiseGateActive, threshold, attackMs, holdMs, releaseMs, noiseGateActive, threshold, attackMs, holdMs, releaseMs,
@@ -522,6 +525,7 @@ export class Publisher {
vadEnabled: vadActive, vadEnabled: vadActive,
vadPositiveThreshold: vadPos, vadPositiveThreshold: vadPos,
vadNegativeThreshold: vadNeg, vadNegativeThreshold: vadNeg,
vadAggressive: vadModeValue === "aggressive",
}); });
}); });
} }