From 9fc9655dbb938475f0a1295f1af64857f8c5a39d Mon Sep 17 00:00:00 2001 From: mk Date: Tue, 24 Mar 2026 07:52:52 -0300 Subject: [PATCH] fix: proper radio buttons for VAD mode, standard=16ms/aggressive=10ms - Use compound-web Form/InlineField/RadioControl/Label/HelpMessage for VAD mode selection (proper radio button rendering) - Standard mode: 256 samples / 16 ms hop + 5 ms open / 20 ms close ramp - Aggressive mode: 160 samples / 10 ms hop + 1 ms open / 5 ms close ramp - Worklet stores WebAssembly.Module and recreates TenVADRuntime with the correct hop size whenever the mode changes Co-Authored-By: Claude Sonnet 4.6 --- src/livekit/NoiseGateProcessor.worklet.ts | 34 +++++++++--- src/settings/SettingsModal.tsx | 64 ++++++++++++++--------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/src/livekit/NoiseGateProcessor.worklet.ts b/src/livekit/NoiseGateProcessor.worklet.ts index 15b4a2d1..560104d7 100644 --- a/src/livekit/NoiseGateProcessor.worklet.ts +++ b/src/livekit/NoiseGateProcessor.worklet.ts @@ -218,13 +218,16 @@ class NoiseGateProcessor extends AudioWorkletProcessor { private vadEnabled = false; private vadPositiveThreshold = 0.5; private vadNegativeThreshold = 0.3; + private vadAggressive = false; private tenVadRuntime: TenVADRuntime | null = null; + private tenVadModule: WebAssembly.Module | undefined = undefined; // 3:1 decimation from AudioContext sample rate to 16 kHz private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000)); private decPhase = 0; private decAcc = 0; - // 160-sample hop = 10 ms @ 16 kHz (minimum supported by TEN-VAD) - private readonly vadHopBuf = new Int16Array(160); + // Buffer sized for max hop (256); vadHopSize tracks how many samples to collect + private readonly vadHopBuf = new Int16Array(256); + private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms) private vadHopCount = 0; private logCounter = 0; @@ -235,13 +238,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor { super(options); // Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread - const tenVadModule = options?.processorOptions?.tenVadModule as + this.tenVadModule = options?.processorOptions?.tenVadModule as | WebAssembly.Module | undefined; - if (tenVadModule) { + if (this.tenVadModule) { try { - // hopSize = 160 samples @ 16 kHz = 10 ms; threshold = 0.5 (overridden via params) - this.tenVadRuntime = new TenVADRuntime(tenVadModule, 160, 0.5); + // Default: standard mode — 256 samples @ 16 kHz = 16 ms + this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5); this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio, @@ -297,13 +300,28 @@ class NoiseGateProcessor extends AudioWorkletProcessor { this.vadEnabled = p.vadEnabled ?? false; this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5; this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3; - if (p.vadAggressive) { + const newAggressive = p.vadAggressive ?? false; + if (newAggressive) { this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms } else { this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms } + // Recreate runtime if mode changed (hop size differs between standard/aggressive) + const newHopSize = newAggressive ? 160 : 256; + if (newAggressive !== this.vadAggressive && this.tenVadModule) { + this.tenVadRuntime?.destroy(); + this.tenVadRuntime = null; + this.vadHopCount = 0; + try { + this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5); + } catch (e) { + this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) }); + } + } + this.vadAggressive = newAggressive; + this.vadHopSize = newHopSize; // When VAD is disabled, open the gate immediately if (!this.vadEnabled) this.vadGateOpen = true; this.port.postMessage({ @@ -396,7 +414,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor { : (avg * 32767 + 0.5) | 0; this.vadHopBuf[this.vadHopCount++] = s16; - if (this.vadHopCount >= 160) { + if (this.vadHopCount >= this.vadHopSize) { this.vadHopCount = 0; const prob = this.tenVadRuntime.process(this.vadHopBuf); if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) { diff --git a/src/settings/SettingsModal.tsx b/src/settings/SettingsModal.tsx index b107153f..e816aad9 100644 --- a/src/settings/SettingsModal.tsx +++ b/src/settings/SettingsModal.tsx @@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial Please see LICENSE in the repository root for full details. */ -import { type ChangeEvent, type FC, type ReactNode, useEffect, useState, useCallback } from "react"; +import { type ChangeEvent, type FC, type ReactNode, useEffect, useId, useState, useCallback } from "react"; import { useTranslation } from "react-i18next"; import { type MatrixClient } from "matrix-js-sdk"; -import { Button, Heading, Root as Form, Separator } from "@vector-im/compound-web"; +import { + Button, + Heading, + HelpMessage, + InlineField, + Label, + RadioControl, + Root as Form, + Separator, +} from "@vector-im/compound-web"; import { type Room as LivekitRoom } from "livekit-client"; import { Modal } from "../Modal"; @@ -134,6 +143,7 @@ export const SettingsModal: FC = ({ const [showAdvancedGate, setShowAdvancedGate] = useState(false); // Voice activity detection + const vadModeRadioGroup = useId(); const [vadActive, setVadActive] = useSetting(vadEnabledSetting); const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting); const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting); @@ -347,28 +357,34 @@ export const SettingsModal: FC = ({ {vadActive && ( <> - - setVadModeValue("standard")} - /> - - - setVadModeValue("aggressive")} - /> - +
+ setVadModeValue("standard")} + /> + } + > + + 256 samples / 16 ms — comfortable feel. + + setVadModeValue("aggressive")} + /> + } + > + + 160 samples / 10 ms — lowest possible latency. + +
Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%

How confident the model must be before opening the gate.