feat: add VAD mode setting — standard vs aggressive latency
Standard: 5 ms open / 20 ms close ramp (comfortable feel) Aggressive: 1 ms open / 5 ms close ramp (lowest possible latency) The mode is surfaced as a radio selector in Settings → Audio → Voice activity detection, visible while VAD is enabled. Wired through NoiseGateParams.vadAggressive → worklet updateParams. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,7 @@ interface NoiseGateParams {
|
|||||||
vadEnabled: boolean;
|
vadEnabled: boolean;
|
||||||
vadPositiveThreshold: number; // open gate when prob >= this (0–1)
|
vadPositiveThreshold: number; // open gate when prob >= this (0–1)
|
||||||
vadNegativeThreshold: number; // close gate when prob < this (0–1)
|
vadNegativeThreshold: number; // close gate when prob < this (0–1)
|
||||||
|
vadAggressive: boolean; // true: 1 ms open / 5 ms close; false: 5 ms / 20 ms
|
||||||
}
|
}
|
||||||
|
|
||||||
interface VADGateMessage {
|
interface VADGateMessage {
|
||||||
@@ -209,9 +210,9 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
|||||||
// VAD gate state
|
// VAD gate state
|
||||||
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
|
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
|
||||||
private vadAttenuation = 1.0;
|
private vadAttenuation = 1.0;
|
||||||
// Asymmetric ramp: fast open to avoid masking speech onset, slow close to de-click
|
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
|
||||||
private readonly vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
|
||||||
private readonly vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
|
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
|
||||||
|
|
||||||
// TEN-VAD state
|
// TEN-VAD state
|
||||||
private vadEnabled = false;
|
private vadEnabled = false;
|
||||||
@@ -275,6 +276,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
|||||||
vadEnabled: false,
|
vadEnabled: false,
|
||||||
vadPositiveThreshold: 0.5,
|
vadPositiveThreshold: 0.5,
|
||||||
vadNegativeThreshold: 0.3,
|
vadNegativeThreshold: 0.3,
|
||||||
|
vadAggressive: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
this.port.postMessage({
|
this.port.postMessage({
|
||||||
@@ -295,6 +297,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
|
|||||||
this.vadEnabled = p.vadEnabled ?? false;
|
this.vadEnabled = p.vadEnabled ?? false;
|
||||||
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
|
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
|
||||||
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
|
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
|
||||||
|
if (p.vadAggressive) {
|
||||||
|
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
|
||||||
|
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||||
|
} else {
|
||||||
|
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||||
|
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
|
||||||
|
}
|
||||||
// When VAD is disabled, open the gate immediately
|
// When VAD is disabled, open the gate immediately
|
||||||
if (!this.vadEnabled) this.vadGateOpen = true;
|
if (!this.vadEnabled) this.vadGateOpen = true;
|
||||||
this.port.postMessage({
|
this.port.postMessage({
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ export interface NoiseGateParams {
|
|||||||
vadEnabled: boolean;
|
vadEnabled: boolean;
|
||||||
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (0–1)
|
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (0–1)
|
||||||
vadNegativeThreshold: number; // close gate when isSpeech prob < this (0–1)
|
vadNegativeThreshold: number; // close gate when isSpeech prob < this (0–1)
|
||||||
|
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ import {
|
|||||||
vadEnabled as vadEnabledSetting,
|
vadEnabled as vadEnabledSetting,
|
||||||
vadPositiveThreshold as vadPositiveThresholdSetting,
|
vadPositiveThreshold as vadPositiveThresholdSetting,
|
||||||
vadNegativeThreshold as vadNegativeThresholdSetting,
|
vadNegativeThreshold as vadNegativeThresholdSetting,
|
||||||
|
vadMode as vadModeSetting,
|
||||||
} from "./settings";
|
} from "./settings";
|
||||||
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
|
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
|
||||||
import { Slider } from "../Slider";
|
import { Slider } from "../Slider";
|
||||||
@@ -134,6 +135,7 @@ export const SettingsModal: FC<Props> = ({
|
|||||||
|
|
||||||
// Voice activity detection
|
// Voice activity detection
|
||||||
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
|
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
|
||||||
|
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
|
||||||
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
|
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
|
||||||
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
|
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
|
||||||
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
|
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
|
||||||
@@ -336,7 +338,7 @@ export const SettingsModal: FC<Props> = ({
|
|||||||
id="vadEnabled"
|
id="vadEnabled"
|
||||||
type="checkbox"
|
type="checkbox"
|
||||||
label="Enable voice activity detection"
|
label="Enable voice activity detection"
|
||||||
description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)."
|
description="Uses TEN-VAD to mute audio when no speech is detected (~10 ms latency)."
|
||||||
checked={vadActive}
|
checked={vadActive}
|
||||||
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
|
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
|
||||||
setVadActive(e.target.checked)
|
setVadActive(e.target.checked)
|
||||||
@@ -345,6 +347,28 @@ export const SettingsModal: FC<Props> = ({
|
|||||||
</FieldRow>
|
</FieldRow>
|
||||||
{vadActive && (
|
{vadActive && (
|
||||||
<>
|
<>
|
||||||
|
<FieldRow>
|
||||||
|
<InputField
|
||||||
|
id="vadModeStandard"
|
||||||
|
type="radio"
|
||||||
|
name="vadMode"
|
||||||
|
label="Standard"
|
||||||
|
description="5 ms open / 20 ms close ramp — comfortable feel."
|
||||||
|
checked={vadModeValue === "standard"}
|
||||||
|
onChange={(): void => setVadModeValue("standard")}
|
||||||
|
/>
|
||||||
|
</FieldRow>
|
||||||
|
<FieldRow>
|
||||||
|
<InputField
|
||||||
|
id="vadModeAggressive"
|
||||||
|
type="radio"
|
||||||
|
name="vadMode"
|
||||||
|
label="Aggressive"
|
||||||
|
description="1 ms open / 5 ms close ramp — lowest possible latency."
|
||||||
|
checked={vadModeValue === "aggressive"}
|
||||||
|
onChange={(): void => setVadModeValue("aggressive")}
|
||||||
|
/>
|
||||||
|
</FieldRow>
|
||||||
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
|
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
|
||||||
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
|
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
|
||||||
<p>How confident the model must be before opening the gate.</p>
|
<p>How confident the model must be before opening the gate.</p>
|
||||||
|
|||||||
@@ -150,6 +150,8 @@ export const vadEnabled = new Setting<boolean>("vad-enabled", false);
|
|||||||
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
|
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
|
||||||
// Probability below which the VAD closes the gate (0–1)
|
// Probability below which the VAD closes the gate (0–1)
|
||||||
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
|
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
|
||||||
|
// standard: 5 ms open / 20 ms close ramp aggressive: 1 ms open / 5 ms close ramp
|
||||||
|
export const vadMode = new Setting<"standard" | "aggressive">("vad-mode", "standard");
|
||||||
|
|
||||||
export const transientSuppressorEnabled = new Setting<boolean>(
|
export const transientSuppressorEnabled = new Setting<boolean>(
|
||||||
"transient-suppressor-enabled",
|
"transient-suppressor-enabled",
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ import {
|
|||||||
vadEnabled,
|
vadEnabled,
|
||||||
vadPositiveThreshold,
|
vadPositiveThreshold,
|
||||||
vadNegativeThreshold,
|
vadNegativeThreshold,
|
||||||
|
vadMode,
|
||||||
} from "../../../settings/settings.ts";
|
} from "../../../settings/settings.ts";
|
||||||
import {
|
import {
|
||||||
type NoiseGateParams,
|
type NoiseGateParams,
|
||||||
@@ -451,6 +452,7 @@ export class Publisher {
|
|||||||
vadEnabled: vadEnabled.getValue(),
|
vadEnabled: vadEnabled.getValue(),
|
||||||
vadPositiveThreshold: vadPositiveThreshold.getValue(),
|
vadPositiveThreshold: vadPositiveThreshold.getValue(),
|
||||||
vadNegativeThreshold: vadNegativeThreshold.getValue(),
|
vadNegativeThreshold: vadNegativeThreshold.getValue(),
|
||||||
|
vadAggressive: vadMode.getValue() === "aggressive",
|
||||||
});
|
});
|
||||||
|
|
||||||
// Attach / detach processor when any processing feature changes or the track changes.
|
// Attach / detach processor when any processing feature changes or the track changes.
|
||||||
@@ -509,12 +511,13 @@ export class Publisher {
|
|||||||
vadEnabled.value$,
|
vadEnabled.value$,
|
||||||
vadPositiveThreshold.value$,
|
vadPositiveThreshold.value$,
|
||||||
vadNegativeThreshold.value$,
|
vadNegativeThreshold.value$,
|
||||||
|
vadMode.value$,
|
||||||
])
|
])
|
||||||
.pipe(scope.bind())
|
.pipe(scope.bind())
|
||||||
.subscribe(([
|
.subscribe(([
|
||||||
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
||||||
transientEnabled, transientThresholdDb, transientReleaseMs,
|
transientEnabled, transientThresholdDb, transientReleaseMs,
|
||||||
vadActive, vadPos, vadNeg,
|
vadActive, vadPos, vadNeg, vadModeValue,
|
||||||
]) => {
|
]) => {
|
||||||
transformer?.updateParams({
|
transformer?.updateParams({
|
||||||
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
noiseGateActive, threshold, attackMs, holdMs, releaseMs,
|
||||||
@@ -522,6 +525,7 @@ export class Publisher {
|
|||||||
vadEnabled: vadActive,
|
vadEnabled: vadActive,
|
||||||
vadPositiveThreshold: vadPos,
|
vadPositiveThreshold: vadPos,
|
||||||
vadNegativeThreshold: vadNeg,
|
vadNegativeThreshold: vadNeg,
|
||||||
|
vadAggressive: vadModeValue === "aggressive",
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user