feat: add VAD mode setting — standard vs aggressive latency

Standard: 5 ms open / 20 ms close ramp (comfortable feel)
Aggressive: 1 ms open / 5 ms close ramp (lowest possible latency)

The mode is surfaced as a radio selector in Settings → Audio → Voice
activity detection, visible while VAD is enabled. Wired through
NoiseGateParams.vadAggressive → worklet updateParams.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 07:48:41 -03:00
parent 025735c490
commit e95e613c08
5 changed files with 45 additions and 5 deletions

View File

@@ -36,6 +36,7 @@ interface NoiseGateParams {
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close; false: 5 ms / 20 ms
}
interface VADGateMessage {
@@ -209,9 +210,9 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// VAD gate state
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
private vadAttenuation = 1.0;
// Asymmetric ramp: fast open to avoid masking speech onset, slow close to de-click
private readonly vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
private readonly vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
// TEN-VAD state
private vadEnabled = false;
@@ -275,6 +276,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
vadEnabled: false,
vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3,
vadAggressive: false,
});
this.port.postMessage({
@@ -295,6 +297,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
if (p.vadAggressive) {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
}
// When VAD is disabled, open the gate immediately
if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({

View File

@@ -23,6 +23,7 @@ export interface NoiseGateParams {
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
}
/**

View File

@@ -35,6 +35,7 @@ import {
vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting,
vadNegativeThreshold as vadNegativeThresholdSetting,
vadMode as vadModeSetting,
} from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider";
@@ -134,6 +135,7 @@ export const SettingsModal: FC<Props> = ({
// Voice activity detection
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
@@ -336,7 +338,7 @@ export const SettingsModal: FC<Props> = ({
id="vadEnabled"
type="checkbox"
label="Enable voice activity detection"
description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)."
description="Uses TEN-VAD to mute audio when no speech is detected (~10 ms latency)."
checked={vadActive}
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setVadActive(e.target.checked)
@@ -345,6 +347,28 @@ export const SettingsModal: FC<Props> = ({
</FieldRow>
{vadActive && (
<>
<FieldRow>
<InputField
id="vadModeStandard"
type="radio"
name="vadMode"
label="Standard"
description="5 ms open / 20 ms close ramp — comfortable feel."
checked={vadModeValue === "standard"}
onChange={(): void => setVadModeValue("standard")}
/>
</FieldRow>
<FieldRow>
<InputField
id="vadModeAggressive"
type="radio"
name="vadMode"
label="Aggressive"
description="1 ms open / 5 ms close ramp — lowest possible latency."
checked={vadModeValue === "aggressive"}
onChange={(): void => setVadModeValue("aggressive")}
/>
</FieldRow>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
<p>How confident the model must be before opening the gate.</p>

View File

@@ -150,6 +150,8 @@ export const vadEnabled = new Setting<boolean>("vad-enabled", false);
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
// Probability below which the VAD closes the gate (01)
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
// standard: 5 ms open / 20 ms close ramp aggressive: 1 ms open / 5 ms close ramp
export const vadMode = new Setting<"standard" | "aggressive">("vad-mode", "standard");
export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled",

View File

@@ -44,6 +44,7 @@ import {
vadEnabled,
vadPositiveThreshold,
vadNegativeThreshold,
vadMode,
} from "../../../settings/settings.ts";
import {
type NoiseGateParams,
@@ -451,6 +452,7 @@ export class Publisher {
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadPositiveThreshold.getValue(),
vadNegativeThreshold: vadNegativeThreshold.getValue(),
vadAggressive: vadMode.getValue() === "aggressive",
});
// Attach / detach processor when any processing feature changes or the track changes.
@@ -509,12 +511,13 @@ export class Publisher {
vadEnabled.value$,
vadPositiveThreshold.value$,
vadNegativeThreshold.value$,
vadMode.value$,
])
.pipe(scope.bind())
.subscribe(([
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs,
vadActive, vadPos, vadNeg,
vadActive, vadPos, vadNeg, vadModeValue,
]) => {
transformer?.updateParams({
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
@@ -522,6 +525,7 @@ export class Publisher {
vadEnabled: vadActive,
vadPositiveThreshold: vadPos,
vadNegativeThreshold: vadNeg,
vadAggressive: vadModeValue === "aggressive",
});
});
}