feat: add VAD mode setting — standard vs aggressive latency
Standard: 5 ms open / 20 ms close ramp (comfortable feel) Aggressive: 1 ms open / 5 ms close ramp (lowest possible latency) The mode is surfaced as a radio selector in Settings → Audio → Voice activity detection, visible while VAD is enabled. Wired through NoiseGateParams.vadAggressive → worklet updateParams. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,7 @@ interface NoiseGateParams {
|
||||
vadEnabled: boolean;
|
||||
vadPositiveThreshold: number; // open gate when prob >= this (0–1)
|
||||
vadNegativeThreshold: number; // close gate when prob < this (0–1)
|
||||
vadAggressive: boolean; // true: 1 ms open / 5 ms close; false: 5 ms / 20 ms
|
||||
}
|
||||
|
||||
interface VADGateMessage {
|
||||
@@ -209,9 +210,9 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
||||
// VAD gate state
|
||||
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
|
||||
private vadAttenuation = 1.0;
|
||||
// Asymmetric ramp: fast open to avoid masking speech onset, slow close to de-click
|
||||
private readonly vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||
private readonly vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
|
||||
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
|
||||
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
|
||||
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
|
||||
|
||||
// TEN-VAD state
|
||||
private vadEnabled = false;
|
||||
@@ -275,6 +276,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
||||
vadEnabled: false,
|
||||
vadPositiveThreshold: 0.5,
|
||||
vadNegativeThreshold: 0.3,
|
||||
vadAggressive: false,
|
||||
});
|
||||
|
||||
this.port.postMessage({
|
||||
@@ -295,6 +297,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
||||
this.vadEnabled = p.vadEnabled ?? false;
|
||||
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
|
||||
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
|
||||
if (p.vadAggressive) {
|
||||
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
|
||||
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||
} else {
|
||||
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
|
||||
}
|
||||
// When VAD is disabled, open the gate immediately
|
||||
if (!this.vadEnabled) this.vadGateOpen = true;
|
||||
this.port.postMessage({
|
||||
|
||||
@@ -23,6 +23,7 @@ export interface NoiseGateParams {
|
||||
vadEnabled: boolean;
|
||||
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (0–1)
|
||||
vadNegativeThreshold: number; // close gate when isSpeech prob < this (0–1)
|
||||
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -35,6 +35,7 @@ import {
|
||||
vadEnabled as vadEnabledSetting,
|
||||
vadPositiveThreshold as vadPositiveThresholdSetting,
|
||||
vadNegativeThreshold as vadNegativeThresholdSetting,
|
||||
vadMode as vadModeSetting,
|
||||
} from "./settings";
|
||||
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
|
||||
import { Slider } from "../Slider";
|
||||
@@ -134,6 +135,7 @@ export const SettingsModal: FC<Props> = ({
|
||||
|
||||
// Voice activity detection
|
||||
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
|
||||
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
|
||||
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
|
||||
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
|
||||
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
|
||||
@@ -336,7 +338,7 @@ export const SettingsModal: FC<Props> = ({
|
||||
id="vadEnabled"
|
||||
type="checkbox"
|
||||
label="Enable voice activity detection"
|
||||
description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)."
|
||||
description="Uses TEN-VAD to mute audio when no speech is detected (~10 ms latency)."
|
||||
checked={vadActive}
|
||||
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
|
||||
setVadActive(e.target.checked)
|
||||
@@ -345,6 +347,28 @@ export const SettingsModal: FC<Props> = ({
|
||||
</FieldRow>
|
||||
{vadActive && (
|
||||
<>
|
||||
<FieldRow>
|
||||
<InputField
|
||||
id="vadModeStandard"
|
||||
type="radio"
|
||||
name="vadMode"
|
||||
label="Standard"
|
||||
description="5 ms open / 20 ms close ramp — comfortable feel."
|
||||
checked={vadModeValue === "standard"}
|
||||
onChange={(): void => setVadModeValue("standard")}
|
||||
/>
|
||||
</FieldRow>
|
||||
<FieldRow>
|
||||
<InputField
|
||||
id="vadModeAggressive"
|
||||
type="radio"
|
||||
name="vadMode"
|
||||
label="Aggressive"
|
||||
description="1 ms open / 5 ms close ramp — lowest possible latency."
|
||||
checked={vadModeValue === "aggressive"}
|
||||
onChange={(): void => setVadModeValue("aggressive")}
|
||||
/>
|
||||
</FieldRow>
|
||||
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
|
||||
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
|
||||
<p>How confident the model must be before opening the gate.</p>
|
||||
|
||||
@@ -150,6 +150,8 @@ export const vadEnabled = new Setting<boolean>("vad-enabled", false);
|
||||
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
|
||||
// Probability below which the VAD closes the gate (0–1)
|
||||
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
|
||||
// standard: 5 ms open / 20 ms close ramp aggressive: 1 ms open / 5 ms close ramp
|
||||
export const vadMode = new Setting<"standard" | "aggressive">("vad-mode", "standard");
|
||||
|
||||
export const transientSuppressorEnabled = new Setting<boolean>(
|
||||
"transient-suppressor-enabled",
|
||||
|
||||
@@ -44,6 +44,7 @@ import {
|
||||
vadEnabled,
|
||||
vadPositiveThreshold,
|
||||
vadNegativeThreshold,
|
||||
vadMode,
|
||||
} from "../../../settings/settings.ts";
|
||||
import {
|
||||
type NoiseGateParams,
|
||||
@@ -451,6 +452,7 @@ export class Publisher {
|
||||
vadEnabled: vadEnabled.getValue(),
|
||||
vadPositiveThreshold: vadPositiveThreshold.getValue(),
|
||||
vadNegativeThreshold: vadNegativeThreshold.getValue(),
|
||||
vadAggressive: vadMode.getValue() === "aggressive",
|
||||
});
|
||||
|
||||
// Attach / detach processor when any processing feature changes or the track changes.
|
||||
@@ -509,12 +511,13 @@ export class Publisher {
|
||||
vadEnabled.value$,
|
||||
vadPositiveThreshold.value$,
|
||||
vadNegativeThreshold.value$,
|
||||
vadMode.value$,
|
||||
])
|
||||
.pipe(scope.bind())
|
||||
.subscribe(([
|
||||
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
||||
transientEnabled, transientThresholdDb, transientReleaseMs,
|
||||
vadActive, vadPos, vadNeg,
|
||||
vadActive, vadPos, vadNeg, vadModeValue,
|
||||
]) => {
|
||||
transformer?.updateParams({
|
||||
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
||||
@@ -522,6 +525,7 @@ export class Publisher {
|
||||
vadEnabled: vadActive,
|
||||
vadPositiveThreshold: vadPos,
|
||||
vadNegativeThreshold: vadNeg,
|
||||
vadAggressive: vadModeValue === "aggressive",
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user