5 Commits

Author SHA1 Message Date
mk
ea44b04bb3 fix: use ?worker&url import so Vite compiles the worklet TypeScript
new URL('./file.ts', import.meta.url) copies the file verbatim — the
browser gets raw TypeScript and addModule() throws DOMException. Using
?worker&url tells Vite to bundle and compile the file, producing a .js
output that the browser can actually execute.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 22:03:25 -03:00
mk
c42274a511 fix: tighten spacing between VAD enable checkbox and mode radio buttons
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 08:00:14 -03:00
mk
9fc9655dbb fix: proper radio buttons for VAD mode, standard=16ms/aggressive=10ms
- Use compound-web Form/InlineField/RadioControl/Label/HelpMessage for
  VAD mode selection (proper radio button rendering)
- Standard mode: 256 samples / 16 ms hop + 5 ms open / 20 ms close ramp
- Aggressive mode: 160 samples / 10 ms hop + 1 ms open / 5 ms close ramp
- Worklet stores WebAssembly.Module and recreates TenVADRuntime with the
  correct hop size whenever the mode changes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 07:52:52 -03:00
mk
e95e613c08 feat: add VAD mode setting — standard vs aggressive latency
Standard: 5 ms open / 20 ms close ramp (comfortable feel)
Aggressive: 1 ms open / 5 ms close ramp (lowest possible latency)

The mode is surfaced as a radio selector in Settings → Audio → Voice
activity detection, visible while VAD is enabled. Wired through
NoiseGateParams.vadAggressive → worklet updateParams.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 07:48:41 -03:00
mk
025735c490 perf: reduce TEN-VAD latency from 16 ms to 10 ms, asymmetric gate ramp
- Hop size 256 → 160 samples @ 16 kHz: VAD decision every 10 ms instead
  of 16 ms (minimum supported by TEN-VAD)
- Asymmetric VAD ramp: 5 ms open (was 20 ms) to avoid masking speech onset,
  20 ms close retained for de-click on silence

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 07:44:47 -03:00
5 changed files with 100 additions and 21 deletions

View File

@@ -36,6 +36,7 @@ interface NoiseGateParams {
vadEnabled: boolean; vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01) vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01) vadNegativeThreshold: number; // close gate when prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close; false: 5 ms / 20 ms
} }
interface VADGateMessage { interface VADGateMessage {
@@ -182,8 +183,10 @@ class TenVADRuntime {
* gain is instantly cut to 0 and releases over transientReleaseMs. * gain is instantly cut to 0 and releases over transientReleaseMs.
* *
* TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz), * TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz),
* runs the TEN-VAD model synchronously every 256 samples (16 ms), and * runs the TEN-VAD model synchronously every 160 samples (10 ms), and
* controls vadGateOpen with hysteresis. No IPC round-trip required. * controls vadGateOpen with hysteresis. No IPC round-trip required.
* Asymmetric ramp: 5 ms open (minimise speech onset masking), 20 ms close
* (de-click on silence).
*/ */
class NoiseGateProcessor extends AudioWorkletProcessor { class NoiseGateProcessor extends AudioWorkletProcessor {
// Noise gate state // Noise gate state
@@ -207,18 +210,24 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
// VAD gate state // VAD gate state
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
private vadAttenuation = 1.0; private vadAttenuation = 1.0;
private readonly vadRampRate = 1.0 / (0.02 * sampleRate); // Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
// TEN-VAD state // TEN-VAD state
private vadEnabled = false; private vadEnabled = false;
private vadPositiveThreshold = 0.5; private vadPositiveThreshold = 0.5;
private vadNegativeThreshold = 0.3; private vadNegativeThreshold = 0.3;
private vadAggressive = false;
private tenVadRuntime: TenVADRuntime | null = null; private tenVadRuntime: TenVADRuntime | null = null;
private tenVadModule: WebAssembly.Module | undefined = undefined;
// 3:1 decimation from AudioContext sample rate to 16 kHz // 3:1 decimation from AudioContext sample rate to 16 kHz
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000)); private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
private decPhase = 0; private decPhase = 0;
private decAcc = 0; private decAcc = 0;
// Buffer sized for max hop (256); vadHopSize tracks how many samples to collect
private readonly vadHopBuf = new Int16Array(256); private readonly vadHopBuf = new Int16Array(256);
private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms)
private vadHopCount = 0; private vadHopCount = 0;
private logCounter = 0; private logCounter = 0;
@@ -229,13 +238,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
super(options); super(options);
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread // Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
const tenVadModule = options?.processorOptions?.tenVadModule as this.tenVadModule = options?.processorOptions?.tenVadModule as
| WebAssembly.Module | WebAssembly.Module
| undefined; | undefined;
if (tenVadModule) { if (this.tenVadModule) {
try { try {
// hopSize = 256 samples @ 16 kHz = 16 ms; threshold = 0.5 (overridden via params) // Default: standard mode 256 samples @ 16 kHz = 16 ms
this.tenVadRuntime = new TenVADRuntime(tenVadModule, 256, 0.5); this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5);
this.port.postMessage({ this.port.postMessage({
type: "log", type: "log",
msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio, msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
@@ -270,6 +279,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
vadEnabled: false, vadEnabled: false,
vadPositiveThreshold: 0.5, vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3, vadNegativeThreshold: 0.3,
vadAggressive: false,
}); });
this.port.postMessage({ this.port.postMessage({
@@ -290,6 +300,28 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.vadEnabled = p.vadEnabled ?? false; this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5; this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3; this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
const newAggressive = p.vadAggressive ?? false;
if (newAggressive) {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
}
// Recreate runtime if mode changed (hop size differs between standard/aggressive)
const newHopSize = newAggressive ? 160 : 256;
if (newAggressive !== this.vadAggressive && this.tenVadModule) {
this.tenVadRuntime?.destroy();
this.tenVadRuntime = null;
this.vadHopCount = 0;
try {
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5);
} catch (e) {
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) });
}
}
this.vadAggressive = newAggressive;
this.vadHopSize = newHopSize;
// When VAD is disabled, open the gate immediately // When VAD is disabled, open the gate immediately
if (!this.vadEnabled) this.vadGateOpen = true; if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({ this.port.postMessage({
@@ -382,7 +414,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
: (avg * 32767 + 0.5) | 0; : (avg * 32767 + 0.5) | 0;
this.vadHopBuf[this.vadHopCount++] = s16; this.vadHopBuf[this.vadHopCount++] = s16;
if (this.vadHopCount >= 256) { if (this.vadHopCount >= this.vadHopSize) {
this.vadHopCount = 0; this.vadHopCount = 0;
const prob = this.tenVadRuntime.process(this.vadHopBuf); const prob = this.tenVadRuntime.process(this.vadHopBuf);
if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) { if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) {
@@ -394,17 +426,18 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
} }
} }
// Ramp VAD attenuation toward target to avoid clicks // Asymmetric ramp: fast open (5 ms) to minimise speech onset masking,
// slow close (20 ms) to de-click on silence transitions.
const vadTarget = this.vadGateOpen ? 1.0 : 0.0; const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
if (this.vadAttenuation < vadTarget) { if (this.vadAttenuation < vadTarget) {
this.vadAttenuation = Math.min( this.vadAttenuation = Math.min(
vadTarget, vadTarget,
this.vadAttenuation + this.vadRampRate, this.vadAttenuation + this.vadOpenRampRate,
); );
} else if (this.vadAttenuation > vadTarget) { } else if (this.vadAttenuation > vadTarget) {
this.vadAttenuation = Math.max( this.vadAttenuation = Math.max(
vadTarget, vadTarget,
this.vadAttenuation - this.vadRampRate, this.vadAttenuation - this.vadCloseRampRate,
); );
} }

View File

@@ -7,6 +7,9 @@ Please see LICENSE in the repository root for full details.
import { type Track } from "livekit-client"; import { type Track } from "livekit-client";
import { logger } from "matrix-js-sdk/lib/logger"; import { logger } from "matrix-js-sdk/lib/logger";
// ?worker&url tells Vite to compile the TypeScript worklet and return its URL.
// Without this, Vite copies the .ts file verbatim and the browser rejects it.
import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url";
const log = logger.getChild("[NoiseGateTransformer]"); const log = logger.getChild("[NoiseGateTransformer]");
@@ -23,6 +26,7 @@ export interface NoiseGateParams {
vadEnabled: boolean; vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01) vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01) vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
} }
/** /**
@@ -105,12 +109,8 @@ export class NoiseGateTransformer implements AudioTrackProcessor {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e); log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
} }
const workletUrl = new URL( log.info("loading worklet from:", compiledWorkletUrl);
"./NoiseGateProcessor.worklet.ts", await audioContext.audioWorklet.addModule(compiledWorkletUrl);
import.meta.url,
);
log.info("loading worklet from:", workletUrl.href);
await audioContext.audioWorklet.addModule(workletUrl);
log.info("worklet module loaded"); log.info("worklet module loaded");
this.workletNode = new AudioWorkletNode( this.workletNode = new AudioWorkletNode(

View File

@@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details. Please see LICENSE in the repository root for full details.
*/ */
import { type ChangeEvent, type FC, type ReactNode, useEffect, useState, useCallback } from "react"; import { type ChangeEvent, type FC, type ReactNode, useEffect, useId, useState, useCallback } from "react";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { type MatrixClient } from "matrix-js-sdk"; import { type MatrixClient } from "matrix-js-sdk";
import { Button, Heading, Root as Form, Separator } from "@vector-im/compound-web"; import {
Button,
Heading,
HelpMessage,
InlineField,
Label,
RadioControl,
Root as Form,
Separator,
} from "@vector-im/compound-web";
import { type Room as LivekitRoom } from "livekit-client"; import { type Room as LivekitRoom } from "livekit-client";
import { Modal } from "../Modal"; import { Modal } from "../Modal";
@@ -35,6 +44,7 @@ import {
vadEnabled as vadEnabledSetting, vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting, vadPositiveThreshold as vadPositiveThresholdSetting,
vadNegativeThreshold as vadNegativeThresholdSetting, vadNegativeThreshold as vadNegativeThresholdSetting,
vadMode as vadModeSetting,
} from "./settings"; } from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab"; import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider"; import { Slider } from "../Slider";
@@ -133,7 +143,9 @@ export const SettingsModal: FC<Props> = ({
const [showAdvancedGate, setShowAdvancedGate] = useState(false); const [showAdvancedGate, setShowAdvancedGate] = useState(false);
// Voice activity detection // Voice activity detection
const vadModeRadioGroup = useId();
const [vadActive, setVadActive] = useSetting(vadEnabledSetting); const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting); const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold); const [vadPositiveThresholdRaw, setVadPositiveThresholdRaw] = useState(vadPositiveThreshold);
const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting); const [vadNegativeThreshold, setVadNegativeThreshold] = useSetting(vadNegativeThresholdSetting);
@@ -336,7 +348,7 @@ export const SettingsModal: FC<Props> = ({
id="vadEnabled" id="vadEnabled"
type="checkbox" type="checkbox"
label="Enable voice activity detection" label="Enable voice activity detection"
description="Uses TEN-VAD to mute audio when no speech is detected (~16 ms latency)." description="Uses TEN-VAD to mute audio when no speech is detected (~10 ms latency)."
checked={vadActive} checked={vadActive}
onChange={(e: ChangeEvent<HTMLInputElement>): void => onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setVadActive(e.target.checked) setVadActive(e.target.checked)
@@ -345,7 +357,35 @@ export const SettingsModal: FC<Props> = ({
</FieldRow> </FieldRow>
{vadActive && ( {vadActive && (
<> <>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}> <Form style={{ marginTop: "-16px" }}>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "standard"}
value="standard"
onChange={(): void => setVadModeValue("standard")}
/>
}
>
<Label>Standard</Label>
<HelpMessage>256 samples / 16 ms comfortable feel.</HelpMessage>
</InlineField>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "aggressive"}
value="aggressive"
onChange={(): void => setVadModeValue("aggressive")}
/>
}
>
<Label>Aggressive</Label>
<HelpMessage>160 samples / 10 ms lowest possible latency.</HelpMessage>
</InlineField>
</Form>
<div style={{ marginTop: "16px" }} className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span> <span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
<p>How confident the model must be before opening the gate.</p> <p>How confident the model must be before opening the gate.</p>
<Slider <Slider

View File

@@ -150,6 +150,8 @@ export const vadEnabled = new Setting<boolean>("vad-enabled", false);
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2); export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.2);
// Probability below which the VAD closes the gate (01) // Probability below which the VAD closes the gate (01)
export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1); export const vadNegativeThreshold = new Setting<number>("vad-negative-threshold", 0.1);
// standard: 5 ms open / 20 ms close ramp aggressive: 1 ms open / 5 ms close ramp
export const vadMode = new Setting<"standard" | "aggressive">("vad-mode", "standard");
export const transientSuppressorEnabled = new Setting<boolean>( export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled", "transient-suppressor-enabled",

View File

@@ -44,6 +44,7 @@ import {
vadEnabled, vadEnabled,
vadPositiveThreshold, vadPositiveThreshold,
vadNegativeThreshold, vadNegativeThreshold,
vadMode,
} from "../../../settings/settings.ts"; } from "../../../settings/settings.ts";
import { import {
type NoiseGateParams, type NoiseGateParams,
@@ -451,6 +452,7 @@ export class Publisher {
vadEnabled: vadEnabled.getValue(), vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadPositiveThreshold.getValue(), vadPositiveThreshold: vadPositiveThreshold.getValue(),
vadNegativeThreshold: vadNegativeThreshold.getValue(), vadNegativeThreshold: vadNegativeThreshold.getValue(),
vadAggressive: vadMode.getValue() === "aggressive",
}); });
// Attach / detach processor when any processing feature changes or the track changes. // Attach / detach processor when any processing feature changes or the track changes.
@@ -509,12 +511,13 @@ export class Publisher {
vadEnabled.value$, vadEnabled.value$,
vadPositiveThreshold.value$, vadPositiveThreshold.value$,
vadNegativeThreshold.value$, vadNegativeThreshold.value$,
vadMode.value$,
]) ])
.pipe(scope.bind()) .pipe(scope.bind())
.subscribe(([ .subscribe(([
noiseGateActive, threshold, attackMs, holdMs, releaseMs, noiseGateActive, threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs, transientEnabled, transientThresholdDb, transientReleaseMs,
vadActive, vadPos, vadNeg, vadActive, vadPos, vadNeg, vadModeValue,
]) => { ]) => {
transformer?.updateParams({ transformer?.updateParams({
noiseGateActive, threshold, attackMs, holdMs, releaseMs, noiseGateActive, threshold, attackMs, holdMs, releaseMs,
@@ -522,6 +525,7 @@ export class Publisher {
vadEnabled: vadActive, vadEnabled: vadActive,
vadPositiveThreshold: vadPos, vadPositiveThreshold: vadPos,
vadNegativeThreshold: vadNeg, vadNegativeThreshold: vadNeg,
vadAggressive: vadModeValue === "aggressive",
}); });
}); });
} }