fix: proper radio buttons for VAD mode, standard=16ms/aggressive=10ms

- Use compound-web Form/InlineField/RadioControl/Label/HelpMessage for
  VAD mode selection (proper radio button rendering)
- Standard mode: 256 samples / 16 ms hop + 5 ms open / 20 ms close ramp
- Aggressive mode: 160 samples / 10 ms hop + 1 ms open / 5 ms close ramp
- Worklet stores WebAssembly.Module and recreates TenVADRuntime with the
  correct hop size whenever the mode changes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 07:52:52 -03:00
parent e95e613c08
commit 9fc9655dbb
2 changed files with 66 additions and 32 deletions

View File

@@ -218,13 +218,16 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
private vadEnabled = false; private vadEnabled = false;
private vadPositiveThreshold = 0.5; private vadPositiveThreshold = 0.5;
private vadNegativeThreshold = 0.3; private vadNegativeThreshold = 0.3;
private vadAggressive = false;
private tenVadRuntime: TenVADRuntime | null = null; private tenVadRuntime: TenVADRuntime | null = null;
private tenVadModule: WebAssembly.Module | undefined = undefined;
// 3:1 decimation from AudioContext sample rate to 16 kHz // 3:1 decimation from AudioContext sample rate to 16 kHz
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000)); private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
private decPhase = 0; private decPhase = 0;
private decAcc = 0; private decAcc = 0;
// 160-sample hop = 10 ms @ 16 kHz (minimum supported by TEN-VAD) // Buffer sized for max hop (256); vadHopSize tracks how many samples to collect
private readonly vadHopBuf = new Int16Array(160); private readonly vadHopBuf = new Int16Array(256);
private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms)
private vadHopCount = 0; private vadHopCount = 0;
private logCounter = 0; private logCounter = 0;
@@ -235,13 +238,13 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
super(options); super(options);
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread // Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
const tenVadModule = options?.processorOptions?.tenVadModule as this.tenVadModule = options?.processorOptions?.tenVadModule as
| WebAssembly.Module | WebAssembly.Module
| undefined; | undefined;
if (tenVadModule) { if (this.tenVadModule) {
try { try {
// hopSize = 160 samples @ 16 kHz = 10 ms; threshold = 0.5 (overridden via params) // Default: standard mode — 256 samples @ 16 kHz = 16 ms
this.tenVadRuntime = new TenVADRuntime(tenVadModule, 160, 0.5); this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5);
this.port.postMessage({ this.port.postMessage({
type: "log", type: "log",
msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio, msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
@@ -297,13 +300,28 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
this.vadEnabled = p.vadEnabled ?? false; this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5; this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3; this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
if (p.vadAggressive) { const newAggressive = p.vadAggressive ?? false;
if (newAggressive) {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms — near-instant
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else { } else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
} }
// Recreate runtime if mode changed (hop size differs between standard/aggressive)
const newHopSize = newAggressive ? 160 : 256;
if (newAggressive !== this.vadAggressive && this.tenVadModule) {
this.tenVadRuntime?.destroy();
this.tenVadRuntime = null;
this.vadHopCount = 0;
try {
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5);
} catch (e) {
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) });
}
}
this.vadAggressive = newAggressive;
this.vadHopSize = newHopSize;
// When VAD is disabled, open the gate immediately // When VAD is disabled, open the gate immediately
if (!this.vadEnabled) this.vadGateOpen = true; if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({ this.port.postMessage({
@@ -396,7 +414,7 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
: (avg * 32767 + 0.5) | 0; : (avg * 32767 + 0.5) | 0;
this.vadHopBuf[this.vadHopCount++] = s16; this.vadHopBuf[this.vadHopCount++] = s16;
if (this.vadHopCount >= 160) { if (this.vadHopCount >= this.vadHopSize) {
this.vadHopCount = 0; this.vadHopCount = 0;
const prob = this.tenVadRuntime.process(this.vadHopBuf); const prob = this.tenVadRuntime.process(this.vadHopBuf);
if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) { if (!this.vadGateOpen && prob >= this.vadPositiveThreshold) {

View File

@@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details. Please see LICENSE in the repository root for full details.
*/ */
import { type ChangeEvent, type FC, type ReactNode, useEffect, useState, useCallback } from "react"; import { type ChangeEvent, type FC, type ReactNode, useEffect, useId, useState, useCallback } from "react";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { type MatrixClient } from "matrix-js-sdk"; import { type MatrixClient } from "matrix-js-sdk";
import { Button, Heading, Root as Form, Separator } from "@vector-im/compound-web"; import {
Button,
Heading,
HelpMessage,
InlineField,
Label,
RadioControl,
Root as Form,
Separator,
} from "@vector-im/compound-web";
import { type Room as LivekitRoom } from "livekit-client"; import { type Room as LivekitRoom } from "livekit-client";
import { Modal } from "../Modal"; import { Modal } from "../Modal";
@@ -134,6 +143,7 @@ export const SettingsModal: FC<Props> = ({
const [showAdvancedGate, setShowAdvancedGate] = useState(false); const [showAdvancedGate, setShowAdvancedGate] = useState(false);
// Voice activity detection // Voice activity detection
const vadModeRadioGroup = useId();
const [vadActive, setVadActive] = useSetting(vadEnabledSetting); const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting); const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting); const [vadPositiveThreshold, setVadPositiveThreshold] = useSetting(vadPositiveThresholdSetting);
@@ -347,28 +357,34 @@ export const SettingsModal: FC<Props> = ({
</FieldRow> </FieldRow>
{vadActive && ( {vadActive && (
<> <>
<FieldRow> <Form>
<InputField <InlineField
id="vadModeStandard" name={vadModeRadioGroup}
type="radio" control={
name="vadMode" <RadioControl
label="Standard" checked={vadModeValue === "standard"}
description="5 ms open / 20 ms close ramp — comfortable feel." value="standard"
checked={vadModeValue === "standard"} onChange={(): void => setVadModeValue("standard")}
onChange={(): void => setVadModeValue("standard")} />
/> }
</FieldRow> >
<FieldRow> <Label>Standard</Label>
<InputField <HelpMessage>256 samples / 16 ms comfortable feel.</HelpMessage>
id="vadModeAggressive" </InlineField>
type="radio" <InlineField
name="vadMode" name={vadModeRadioGroup}
label="Aggressive" control={
description="1 ms open / 5 ms close ramp — lowest possible latency." <RadioControl
checked={vadModeValue === "aggressive"} checked={vadModeValue === "aggressive"}
onChange={(): void => setVadModeValue("aggressive")} value="aggressive"
/> onChange={(): void => setVadModeValue("aggressive")}
</FieldRow> />
}
>
<Label>Aggressive</Label>
<HelpMessage>160 samples / 10 ms lowest possible latency.</HelpMessage>
</InlineField>
</Form>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}> <div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span> <span className={styles.sliderLabel}>Open threshold: {Math.round(vadPositiveThresholdRaw * 100)}%</span>
<p>How confident the model must be before opening the gate.</p> <p>How confident the model must be before opening the gate.</p>