1 Commits

Author SHA1 Message Date
mk
92f0cf2785 feat: adding ten-vad wasm to project, configuration options on audio
Some checks failed
Build / build_full_element_call (pull_request) Has been cancelled
Build / build_embedded_element_call (pull_request) Has been cancelled
Build / build_sdk_element_call (pull_request) Has been cancelled
Lint, format & type check / Lint, format & type check (pull_request) Has been cancelled
Build & publish embedded packages for releases / Versioning (pull_request) Has been cancelled
Test / Run unit tests (pull_request) Has been cancelled
Test / Run end-to-end tests (pull_request) Has been cancelled
GitHub Actions Security Analysis with zizmor 🌈 / Run zizmor 🌈 (pull_request) Has been cancelled
Prevent blocked / Prevent blocked (pull_request_target) Has been cancelled
Build / deploy_develop (pull_request) Has been cancelled
Build / docker_for_develop (pull_request) Has been cancelled
Build & publish embedded packages for releases / build_element_call (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish tarball (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish NPM (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish Android AAR (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish SwiftPM Library (pull_request) Has been cancelled
Build & publish embedded packages for releases / Update release notes (pull_request) Has been cancelled
settings
2026-03-28 20:42:50 -03:00
14 changed files with 866 additions and 824 deletions

30
public/vad/ten_vad.js Normal file
View File

@@ -0,0 +1,30 @@
var createVADModule = (() => {
var _scriptDir = import.meta.url;
return (
function(createVADModule) {
createVADModule = createVADModule || {};
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
return createVADModule.ready
}
);
})();
export default createVADModule;

BIN
public/vad/ten_vad.wasm Normal file

Binary file not shown.

View File

@@ -31,7 +31,6 @@ interface Props {
max: number;
step: number;
disabled?: boolean;
tooltip?: boolean;
}
/**
@@ -47,7 +46,6 @@ export const Slider: FC<Props> = ({
max,
step,
disabled,
tooltip = true,
}) => {
const onValueChange = useCallback(
([v]: number[]) => onValueChangeProp(v),
@@ -73,13 +71,9 @@ export const Slider: FC<Props> = ({
<Range className={styles.highlight} />
</Track>
{/* Note: This is expected not to be visible on mobile.*/}
{tooltip ? (
<Tooltip placement="top" label={Math.round(value * 100).toString() + "%"}>
<Thumb className={styles.handle} aria-label={label} />
</Tooltip>
) : (
<Tooltip placement="top" label={Math.round(value * 100).toString() + "%"}>
<Thumb className={styles.handle} aria-label={label} />
)}
</Tooltip>
</Root>
);
};

View File

@@ -74,8 +74,6 @@ export function LivekitRoomAudioRenderer({
)
// Only keep audio tracks
.filter((ref) => ref.publication.kind === Track.Kind.Audio)
// Never render local participant's own audio back to themselves
.filter((ref) => !ref.participant.isLocal)
// Only keep tracks from participants that are in the validIdentities list
.filter((ref) => {
const isValid = validIdentities.includes(ref.participant.identity);

View File

@@ -1,177 +0,0 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
declare const sampleRate: number;
declare class AudioWorkletProcessor {
public readonly port: MessagePort;
public process(
inputs: Float32Array[][],
outputs: Float32Array[][],
parameters: Record<string, Float32Array>,
): boolean;
}
declare function registerProcessor(
name: string,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
processorCtor: new (...args: any[]) => AudioWorkletProcessor,
): void;
interface NoiseGateParams {
threshold: number; // dBFS — gate opens above this, closes below it
attackMs: number;
holdMs: number;
releaseMs: number;
transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // how quickly suppression fades after transient ends
}
function dbToLinear(db: number): number {
return Math.pow(10, db / 20);
}
/**
* AudioWorkletProcessor implementing a noise gate and an optional transient
* suppressor, both running per-sample in a single pass.
*
* Noise gate: opens when instantaneous peak exceeds threshold, closes below.
* Attack, hold, and release times smooth the attenuation envelope.
*
* Transient suppressor: tracks a slow-moving RMS background level. When the
* instantaneous peak exceeds the background by more than transientThresholdDb,
* gain is instantly cut to 0 and releases over transientReleaseMs. This catches
* desk hits, mic bumps, and other sudden loud impacts without affecting speech.
*/
class NoiseGateProcessor extends AudioWorkletProcessor {
// Noise gate state
private threshold = dbToLinear(-60);
private attackRate = 1.0 / (0.025 * sampleRate);
private releaseRate = 1.0 / (0.15 * sampleRate);
private holdTime = 0.2;
private isOpen = false;
private gateAttenuation = 0;
private heldTime = 0;
// Transient suppressor state
private transientEnabled = false;
private transientRatio = dbToLinear(15); // peak must exceed rms by this factor
private transientReleaseRate = 1.0 / (0.08 * sampleRate);
private transientAttenuation = 1.0; // 1 = fully open, ramps to 0 on transient
private slowRms = 0;
// Exponential smoothing coefficient for background RMS (~200ms time constant)
private rmsCoeff = Math.exp(-1.0 / (0.2 * sampleRate));
private logCounter = 0;
public constructor() {
super();
this.port.onmessage = (e: MessageEvent<NoiseGateParams>): void => {
this.updateParams(e.data);
};
this.updateParams({
threshold: -60, attackMs: 25, holdMs: 200, releaseMs: 150,
transientEnabled: false, transientThresholdDb: 15, transientReleaseMs: 80,
});
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate });
}
private updateParams(p: NoiseGateParams): void {
this.threshold = dbToLinear(p.threshold);
this.attackRate = 1.0 / ((p.attackMs / 1000) * sampleRate);
this.releaseRate = 1.0 / ((p.releaseMs / 1000) * sampleRate);
this.holdTime = p.holdMs / 1000;
this.transientEnabled = p.transientEnabled;
this.transientRatio = dbToLinear(p.transientThresholdDb);
this.transientReleaseRate = 1.0 / ((p.transientReleaseMs / 1000) * sampleRate);
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] params updated: threshold=" + p.threshold
+ " transientEnabled=" + p.transientEnabled
+ " transientThresholdDb=" + p.transientThresholdDb,
});
}
public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean {
const input = inputs[0];
const output = outputs[0];
if (!input || input.length === 0) return true;
const channels = input.length;
const blockSize = input[0]?.length ?? 128;
const samplePeriod = 1.0 / sampleRate;
for (let i = 0; i < blockSize; i++) {
// Peak detection across all channels
let curLevel = Math.abs(input[0]?.[i] ?? 0);
for (let j = 1; j < channels; j++) {
curLevel = Math.max(curLevel, Math.abs(input[j]?.[i] ?? 0));
}
// --- Transient suppressor ---
let transientGain = 1.0;
if (this.transientEnabled) {
// Update slow RMS background (exponential moving average of energy)
this.slowRms = Math.sqrt(
this.rmsCoeff * this.slowRms * this.slowRms +
(1.0 - this.rmsCoeff) * curLevel * curLevel,
);
const background = Math.max(this.slowRms, 1e-6);
if (curLevel > background * this.transientRatio) {
// Transient detected — instantly cut gain
this.transientAttenuation = 0.0;
} else {
// Release: ramp back toward 1
this.transientAttenuation = Math.min(1.0, this.transientAttenuation + this.transientReleaseRate);
}
transientGain = this.transientAttenuation;
}
// --- Noise gate ---
if (curLevel > this.threshold && !this.isOpen) {
this.isOpen = true;
}
if (curLevel <= this.threshold && this.isOpen) {
this.heldTime = 0;
this.isOpen = false;
}
if (this.isOpen) {
this.gateAttenuation = Math.min(1.0, this.gateAttenuation + this.attackRate);
} else {
this.heldTime += samplePeriod;
if (this.heldTime > this.holdTime) {
this.gateAttenuation = Math.max(0.0, this.gateAttenuation - this.releaseRate);
}
}
const gain = this.gateAttenuation * transientGain;
for (let c = 0; c < output.length; c++) {
const inCh = input[c] ?? input[0];
const outCh = output[c];
if (inCh && outCh) {
outCh[i] = (inCh[i] ?? 0) * gain;
}
}
}
this.logCounter++;
if (this.logCounter % 375 === 0) {
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] gateOpen=" + this.isOpen
+ " gateAtten=" + this.gateAttenuation.toFixed(3)
+ " transientAtten=" + this.transientAttenuation.toFixed(3)
+ " slowRms=" + this.slowRms.toFixed(5),
});
}
return true;
}
}
registerProcessor("noise-gate-processor", NoiseGateProcessor);

View File

@@ -1,127 +0,0 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type Track } from "livekit-client";
import { logger } from "matrix-js-sdk/lib/logger";
const log = logger.getChild("[NoiseGateTransformer]");
export interface NoiseGateParams {
threshold: number; // dBFS — gate opens above this, closes below it
attackMs: number;
holdMs: number;
releaseMs: number;
transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // ms for suppression to fade after transient ends
}
/**
* Matches LiveKit's AudioProcessorOptions (experimental API, not publicly
* exported, so we declare it locally based on the type definitions).
*/
interface AudioProcessorOptions {
kind: Track.Kind.Audio;
track: MediaStreamTrack;
audioContext: AudioContext;
element?: HTMLMediaElement;
}
/**
* Matches LiveKit's TrackProcessor<Track.Kind.Audio> interface.
*/
export interface AudioTrackProcessor {
name: string;
processedTrack?: MediaStreamTrack;
init(opts: AudioProcessorOptions): Promise<void>;
restart(opts: AudioProcessorOptions): Promise<void>;
destroy(): Promise<void>;
}
/**
* LiveKit audio track processor that applies the OBS-style noise gate via
* AudioWorklet.
*
* Builds the audio graph: sourceNode → workletNode → destinationNode, then
* exposes destinationNode's track as processedTrack for LiveKit to swap into
* the WebRTC sender via sender.replaceTrack(processedTrack).
*/
export class NoiseGateTransformer implements AudioTrackProcessor {
public readonly name = "noise-gate";
public processedTrack?: MediaStreamTrack;
private workletNode?: AudioWorkletNode;
private sourceNode?: MediaStreamAudioSourceNode;
private destinationNode?: MediaStreamAudioDestinationNode;
private params: NoiseGateParams;
public constructor(params: NoiseGateParams) {
this.params = { ...params };
}
public async init(opts: AudioProcessorOptions): Promise<void> {
const { track, audioContext } = opts;
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
const workletUrl = new URL(
"./NoiseGateProcessor.worklet.ts",
import.meta.url,
);
log.info("loading worklet from:", workletUrl.href);
await audioContext.audioWorklet.addModule(workletUrl);
log.info("worklet module loaded");
this.workletNode = new AudioWorkletNode(
audioContext,
"noise-gate-processor",
);
this.workletNode.port.onmessage = (e: MessageEvent<{ type: string; msg: string }>): void => {
if (e.data?.type === "log") log.debug(e.data.msg);
};
this.sendParams();
this.sourceNode = audioContext.createMediaStreamSource(
new MediaStream([track]),
);
this.destinationNode = audioContext.createMediaStreamDestination();
this.sourceNode.connect(this.workletNode);
this.workletNode.connect(this.destinationNode);
this.processedTrack = this.destinationNode.stream.getAudioTracks()[0];
log.info("graph wired, processedTrack:", this.processedTrack);
}
public async restart(opts: AudioProcessorOptions): Promise<void> {
await this.destroy();
await this.init(opts);
}
// eslint-disable-next-line @typescript-eslint/require-await
public async destroy(): Promise<void> {
this.sourceNode?.disconnect();
this.workletNode?.disconnect();
this.destinationNode?.disconnect();
this.sourceNode = undefined;
this.workletNode = undefined;
this.destinationNode = undefined;
this.processedTrack = undefined;
}
/** Push updated gate parameters to the running worklet. */
public updateParams(params: NoiseGateParams): void {
this.params = { ...params };
this.sendParams();
}
private sendParams(): void {
if (!this.workletNode) return;
log.debug("sendParams:", this.params);
this.workletNode.port.postMessage(this.params);
}
}

View File

@@ -0,0 +1,391 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
declare const sampleRate: number;
declare class AudioWorkletProcessor {
public readonly port: MessagePort;
public constructor(options?: {
processorOptions?: Record<string, unknown>;
});
public process(
inputs: Float32Array[][],
outputs: Float32Array[][],
parameters: Record<string, Float32Array>,
): boolean;
}
declare function registerProcessor(
name: string,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
processorCtor: new (...args: any[]) => AudioWorkletProcessor,
): void;
interface TenVadParams {
// TEN-VAD params
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01)
vadMode: "standard" | "aggressive" | "loose";
holdMs: number; // hold time before closing gate (ms); 0 = no hold
}
interface VADGateMessage {
type: "vad-gate";
open: boolean;
}
/**
* Thin synchronous wrapper around the TEN-VAD Emscripten WASM module.
* Instantiated synchronously in the AudioWorklet constructor from a
* pre-compiled WebAssembly.Module passed via processorOptions.
*/
class TenVADRuntime {
private readonly mem: WebAssembly.Memory;
private readonly freeFn: (ptr: number) => void;
private readonly processFn: (
handle: number,
audioPtr: number,
hopSize: number,
probPtr: number,
flagPtr: number,
) => number;
private readonly destroyFn: (handle: number) => number;
private readonly handle: number;
private readonly audioBufPtr: number;
private readonly probPtr: number;
private readonly flagPtr: number;
public readonly hopSize: number;
public constructor(
module: WebAssembly.Module,
hopSize: number,
threshold: number,
) {
this.hopSize = hopSize;
// Late-bound memory reference — emscripten_resize_heap and memmove
// are only called after instantiation, so closing over this is safe.
const state = { mem: null as WebAssembly.Memory | null };
const imports = {
a: {
// abort
a: (): never => {
throw new Error("ten_vad abort");
},
// fd_write / proc_exit stub
b: (): number => 0,
// emscripten_resize_heap
c: (reqBytes: number): number => {
if (!state.mem) return 0;
try {
const cur = state.mem.buffer.byteLength;
if (cur >= reqBytes) return 1;
state.mem.grow(Math.ceil((reqBytes - cur) / 65536));
return 1;
} catch {
return 0;
}
},
// fd_write stub
d: (): number => 0,
// environ stub
e: (): number => 0,
// memmove
f: (dest: number, src: number, len: number): void => {
if (state.mem) {
new Uint8Array(state.mem.buffer).copyWithin(dest, src, src + len);
}
},
},
};
// Synchronous instantiation — valid in Worker/AudioWorklet global scope
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const instance = new WebAssembly.Instance(module, imports as any);
const asm = instance.exports as {
g: WebAssembly.Memory; // exported memory
h: () => void; // __wasm_call_ctors
i: (n: number) => number; // malloc
j: (p: number) => void; // free
k: (handlePtr: number, hopSize: number, threshold: number) => number; // ten_vad_create
l: (handle: number, audioPtr: number, hopSize: number, probPtr: number, flagPtr: number) => number; // ten_vad_process
m: (handle: number) => number; // ten_vad_destroy
};
state.mem = asm.g;
this.mem = asm.g;
this.freeFn = asm.j;
this.processFn = asm.l;
this.destroyFn = asm.m;
// Run Emscripten static constructors
asm.h();
// Allocate persistent buffers (malloc is 8-byte aligned, so alignment is fine)
this.audioBufPtr = asm.i(hopSize * 2); // Int16Array
this.probPtr = asm.i(4); // float
this.flagPtr = asm.i(4); // int
// Create VAD handle — ten_vad_create(void** handle, int hopSize, float threshold)
const handlePtrPtr = asm.i(4);
const ret = asm.k(handlePtrPtr, hopSize, threshold);
if (ret !== 0) throw new Error(`ten_vad_create failed: ${ret}`);
this.handle = new Int32Array(this.mem.buffer)[handlePtrPtr >> 2];
asm.j(handlePtrPtr);
}
/** Process one hop of Int16 audio. Returns speech probability [01]. */
public process(samples: Int16Array): number {
new Int16Array(this.mem.buffer).set(samples, this.audioBufPtr >> 1);
this.processFn(
this.handle,
this.audioBufPtr,
this.hopSize,
this.probPtr,
this.flagPtr,
);
return new Float32Array(this.mem.buffer)[this.probPtr >> 2];
}
public destroy(): void {
this.destroyFn(this.handle);
this.freeFn(this.audioBufPtr);
this.freeFn(this.probPtr);
this.freeFn(this.flagPtr);
}
}
/**
* AudioWorkletProcessor implementing an in-worklet TEN-VAD gate running
* per-sample.
*
* TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz),
* runs the TEN-VAD model synchronously every 256 samples (16 ms), and
* controls vadGateOpen with hysteresis. No IPC round-trip required.
* Asymmetric ramp: 5 ms open (minimise speech onset masking), 20 ms close
* (de-click on silence).
*/
class TenVadProcessor extends AudioWorkletProcessor {
// VAD gate state
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
private vadAttenuation = 1.0;
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
// TEN-VAD state
private vadEnabled = false;
private vadPositiveThreshold = 0.5;
private vadNegativeThreshold = 0.3;
private holdMs = 0;
private vadHoldHops = 0; // hold expressed in VAD hops
private vadHoldCounter = 0; // hops of continuous sub-threshold signal while gate is open
private tenVadRuntime: TenVADRuntime | null = null;
private tenVadModule: WebAssembly.Module | undefined = undefined;
// 3:1 decimation from AudioContext sample rate to 16 kHz
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
private decPhase = 0;
private decAcc = 0;
// Buffer sized for max hop (256); vadHopSize tracks how many samples to collect
private readonly vadHopBuf = new Int16Array(256);
private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms)
private vadHopCount = 0;
private logCounter = 0;
public constructor(options?: {
processorOptions?: Record<string, unknown>;
}) {
super(options);
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
this.tenVadModule = options?.processorOptions?.tenVadModule as
| WebAssembly.Module
| undefined;
if (this.tenVadModule) {
try {
// Default: standard mode — 256 samples @ 16 kHz = 16 ms
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5);
this.port.postMessage({
type: "log",
msg: "[TenVad worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
});
} catch (e) {
this.port.postMessage({
type: "log",
msg: "[TenVad worklet] TEN-VAD init failed: " + String(e),
});
}
}
this.port.onmessage = (
e: MessageEvent<TenVadParams | VADGateMessage>,
): void => {
if ((e.data as VADGateMessage).type === "vad-gate") {
this.vadGateOpen = (e.data as VADGateMessage).open;
} else {
this.updateParams(e.data as TenVadParams);
}
};
this.updateParams({
vadEnabled: false,
vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3,
vadMode: "standard",
holdMs: 0,
});
this.port.postMessage({
type: "log",
msg: "[TenVad worklet] constructor called, sampleRate=" + sampleRate,
});
}
private updateParams(p: TenVadParams): void {
this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
this.holdMs = p.holdMs ?? 0;
const newMode = p.vadMode ?? "standard";
if (newMode === "aggressive") {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else if (newMode === "loose") {
this.vadOpenRampRate = 1.0 / (0.012 * sampleRate); // 12 ms
this.vadCloseRampRate = 1.0 / (0.032 * sampleRate); // 32 ms
} else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
}
// Hop size: aggressive=160 (10 ms @ 16 kHz), others=256 (16 ms)
const newHopSize = newMode === "aggressive" ? 160 : 256;
if (newHopSize !== this.vadHopSize && this.tenVadModule) {
this.tenVadRuntime?.destroy();
this.tenVadRuntime = null;
this.vadHopCount = 0;
try {
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5);
} catch (e) {
this.port.postMessage({ type: "log", msg: "[TenVad worklet] TEN-VAD recreate failed: " + String(e) });
}
}
this.vadHopSize = newHopSize;
// Recompute hold in hops: ceil((holdMs / 1000) * 16000 / vadHopSize)
this.vadHoldHops = this.holdMs > 0
? Math.ceil((this.holdMs / 1000) * 16000 / this.vadHopSize)
: 0;
this.vadHoldCounter = 0;
if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({
type: "log",
msg: "[TenVad worklet] params updated: vadEnabled=" + p.vadEnabled
+ " vadPos=" + p.vadPositiveThreshold
+ " vadNeg=" + p.vadNegativeThreshold
+ " vadMode=" + newMode
+ " holdMs=" + this.holdMs,
});
}
public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean {
const input = inputs[0];
const output = outputs[0];
if (!input || input.length === 0) return true;
const blockSize = input[0]?.length ?? 128;
for (let i = 0; i < blockSize; i++) {
// --- TEN-VAD in-worklet processing ---
// Accumulate raw mono samples with decRatio:1 decimation (48 kHz → 16 kHz).
// Every 256 output samples (16 ms) run the WASM VAD and update vadGateOpen.
if (this.vadEnabled && this.tenVadRuntime !== null) {
this.decAcc += input[0]?.[i] ?? 0;
this.decPhase++;
if (this.decPhase >= this.decRatio) {
this.decPhase = 0;
const avg = this.decAcc / this.decRatio;
this.decAcc = 0;
// Float32 [-1,1] → Int16 with clamping
const s16 =
avg >= 1.0
? 32767
: avg <= -1.0
? -32768
: (avg * 32767 + 0.5) | 0;
this.vadHopBuf[this.vadHopCount++] = s16;
if (this.vadHopCount >= this.vadHopSize) {
this.vadHopCount = 0;
const prob = this.tenVadRuntime.process(this.vadHopBuf);
if (prob >= this.vadPositiveThreshold) {
// Speech detected — open gate, reset hold counter
this.vadGateOpen = true;
this.vadHoldCounter = 0;
} else if (prob < this.vadNegativeThreshold) {
if (this.vadGateOpen) {
if (this.vadHoldHops === 0) {
this.vadGateOpen = false;
} else {
this.vadHoldCounter++;
if (this.vadHoldCounter >= this.vadHoldHops) {
this.vadGateOpen = false;
this.vadHoldCounter = 0;
}
}
}
} else {
// Ambiguous zone — reset hold counter so hold only fires on sustained silence
this.vadHoldCounter = 0;
}
}
}
}
// Asymmetric ramp: fast open (5 ms) to minimise speech onset masking,
// slow close (20 ms) to de-click on silence transitions.
const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
if (this.vadAttenuation < vadTarget) {
this.vadAttenuation = Math.min(
vadTarget,
this.vadAttenuation + this.vadOpenRampRate,
);
} else if (this.vadAttenuation > vadTarget) {
this.vadAttenuation = Math.max(
vadTarget,
this.vadAttenuation - this.vadCloseRampRate,
);
}
const gain = this.vadAttenuation;
for (let c = 0; c < output.length; c++) {
const inCh = input[c] ?? input[0];
const outCh = output[c];
if (inCh && outCh) {
outCh[i] = (inCh[i] ?? 0) * gain;
}
}
}
this.logCounter++;
if (this.logCounter % 375 === 0) {
this.port.postMessage({
type: "log",
msg: "[TenVad worklet] vadOpen=" + this.vadGateOpen
+ " vadAtten=" + this.vadAttenuation.toFixed(3),
});
}
return true;
}
}
registerProcessor("ten-vad-processor", TenVadProcessor);

View File

@@ -0,0 +1,163 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type Track } from "livekit-client";
import { logger } from "matrix-js-sdk/lib/logger";
// ?worker&url tells Vite to compile the TypeScript worklet and return its URL.
// Without this, Vite copies the .ts file verbatim and the browser rejects it.
import compiledWorkletUrl from "./TenVadProcessor.worklet.ts?worker&url";
const log = logger.getChild("[TenVadTransformer]");
export interface TenVadParams {
// TEN-VAD params — processed entirely inside the AudioWorklet
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01); computed by Publisher
vadMode: "standard" | "aggressive" | "loose";
holdMs: number; // hold time before closing gate (ms); 0 = no hold
}
/**
* Matches LiveKit's AudioProcessorOptions (experimental API, not publicly
* exported, so we declare it locally based on the type definitions).
*/
interface AudioProcessorOptions {
kind: Track.Kind.Audio;
track: MediaStreamTrack;
audioContext: AudioContext;
element?: HTMLMediaElement;
}
/**
* Matches LiveKit's TrackProcessor<Track.Kind.Audio> interface.
*/
export interface AudioTrackProcessor {
name: string;
processedTrack?: MediaStreamTrack;
init(opts: AudioProcessorOptions): Promise<void>;
restart(opts: AudioProcessorOptions): Promise<void>;
destroy(): Promise<void>;
}
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
function getTenVADModule(): Promise<WebAssembly.Module> {
if (!tenVadModulePromise) {
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
.then((r) => {
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
return r.arrayBuffer();
})
.then((buf) => WebAssembly.compile(buf))
.catch((e) => {
// Clear the cache so a retry is possible on next attach
tenVadModulePromise = null;
throw e;
});
}
return tenVadModulePromise;
}
/**
* LiveKit audio track processor that applies TEN-VAD via AudioWorklet.
*
* The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
* via processorOptions so it runs synchronously inside the audio thread —
* no IPC round-trip, ~16 ms VAD latency.
*
* Audio graph: sourceNode → workletNode → destinationNode
* processedTrack is destinationNode.stream.getAudioTracks()[0]
*/
export class TenVadTransformer implements AudioTrackProcessor {
public readonly name = "ten-vad";
public processedTrack?: MediaStreamTrack;
private workletNode?: AudioWorkletNode;
private sourceNode?: MediaStreamAudioSourceNode;
private destinationNode?: MediaStreamAudioDestinationNode;
private params: TenVadParams;
public constructor(params: TenVadParams) {
this.params = { ...params };
}
public async init(opts: AudioProcessorOptions): Promise<void> {
const { track, audioContext } = opts;
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
// Fetch and compile the TEN-VAD WASM module (cached after first call)
let tenVadModule: WebAssembly.Module | undefined;
try {
tenVadModule = await getTenVADModule();
log.info("TEN-VAD WASM module compiled");
} catch (e) {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
}
log.info("loading worklet from:", compiledWorkletUrl);
await audioContext.audioWorklet.addModule(compiledWorkletUrl);
log.info("worklet module loaded");
this.workletNode = new AudioWorkletNode(
audioContext,
"ten-vad-processor",
{
processorOptions: {
tenVadModule,
},
},
);
this.workletNode.port.onmessage = (
e: MessageEvent<{ type: string; msg: string }>,
): void => {
if (e.data?.type === "log") log.debug(e.data.msg);
};
this.sendParams();
this.sourceNode = audioContext.createMediaStreamSource(
new MediaStream([track]),
);
this.destinationNode = audioContext.createMediaStreamDestination();
this.sourceNode.connect(this.workletNode);
this.workletNode.connect(this.destinationNode);
this.processedTrack = this.destinationNode.stream.getAudioTracks()[0];
log.info("graph wired, processedTrack:", this.processedTrack);
}
public async restart(opts: AudioProcessorOptions): Promise<void> {
await this.destroy();
await this.init(opts);
}
// eslint-disable-next-line @typescript-eslint/require-await
public async destroy(): Promise<void> {
this.sourceNode?.disconnect();
this.workletNode?.disconnect();
this.destinationNode?.disconnect();
this.sourceNode = undefined;
this.workletNode = undefined;
this.destinationNode = undefined;
this.processedTrack = undefined;
}
/** Push updated gate/VAD parameters to the running worklet. */
public updateParams(params: TenVadParams): void {
this.params = { ...params };
this.sendParams();
}
private sendParams(): void {
if (!this.workletNode) return;
log.debug("sendParams:", this.params);
this.workletNode.port.postMessage(this.params);
}
}

View File

@@ -1,87 +0,0 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
.wrapper {
display: flex;
align-items: center;
position: relative;
width: 100%;
}
.slider {
display: flex;
align-items: center;
position: relative;
width: 100%;
}
.track {
flex-grow: 1;
border-radius: var(--cpd-radius-pill-effect);
height: var(--cpd-space-4x);
outline: var(--cpd-border-width-1) solid var(--cpd-color-border-interactive-primary);
outline-offset: calc(-1 * var(--cpd-border-width-1));
cursor: pointer;
position: relative;
overflow: hidden;
/* Base background */
background: var(--cpd-color-bg-subtle-primary);
}
/* Live mic level fill — driven by --mic-level CSS variable */
.track::before {
content: "";
position: absolute;
inset: 0;
width: var(--mic-level, 0%);
background: var(--cpd-color-gray-600, #808080);
opacity: 0.55;
border-radius: var(--cpd-radius-pill-effect);
transition: width 40ms linear;
pointer-events: none;
}
/* Green when mic level is at or above the threshold */
.track[data-active="true"]::before {
background: var(--cpd-color-green-900, #1a7f4b);
}
/* Threshold marker — driven by --threshold-pct CSS variable */
.track::after {
content: "";
position: absolute;
top: 0;
bottom: 0;
left: var(--threshold-pct, 50%);
width: 2px;
background: var(--cpd-color-text-primary);
opacity: 0.6;
pointer-events: none;
transform: translateX(-50%);
}
/* Hide the Radix Range highlight — we use our own visuals */
.range {
display: none;
}
.handle {
display: block;
block-size: var(--cpd-space-5x);
inline-size: var(--cpd-space-5x);
border-radius: var(--cpd-radius-pill-effect);
background: var(--cpd-color-bg-action-primary-rest);
box-shadow: 0 0 0 2px var(--cpd-color-bg-canvas-default);
cursor: pointer;
transition: background-color ease 0.15s;
z-index: 1;
}
.handle:focus-visible {
outline: 2px solid var(--cpd-color-border-focused);
outline-offset: 2px;
}

View File

@@ -1,139 +0,0 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { Root, Track, Range, Thumb } from "@radix-ui/react-slider";
import { type FC, useCallback, useEffect, useRef } from "react";
import styles from "./NoiseLevelSlider.module.css";
interface Props {
label: string;
value: number;
onValueChange: (value: number) => void;
onValueCommit?: (value: number) => void;
min: number;
max: number;
step: number;
}
/**
* Threshold slider that shows live microphone input level as a background fill,
* similar to Discord's input sensitivity control.
*
* The green fill represents your current mic volume in real time.
* Drag the handle to set the gate threshold — audio below it will be silenced.
*/
export const NoiseLevelSlider: FC<Props> = ({
label,
value,
onValueChange: onValueChangeProp,
onValueCommit: onValueCommitProp,
min,
max,
step,
}) => {
const trackRef = useRef<HTMLSpanElement>(null);
const animFrameRef = useRef<number>(0);
const analyserRef = useRef<AnalyserNode | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const dataRef = useRef<Float32Array<ArrayBuffer> | null>(null);
const thresholdPctRef = useRef<number>(0);
// Start mic monitoring via AnalyserNode
useEffect(() => {
let ctx: AudioContext | null = null;
navigator.mediaDevices
.getUserMedia({ audio: true, video: false })
.then((stream) => {
streamRef.current = stream;
ctx = new AudioContext();
const source = ctx.createMediaStreamSource(stream);
const analyser = ctx.createAnalyser();
analyser.fftSize = 1024;
analyser.smoothingTimeConstant = 0.6;
source.connect(analyser);
analyserRef.current = analyser;
dataRef.current = new Float32Array(analyser.fftSize) as Float32Array<ArrayBuffer>;
})
.catch(() => {
// Mic not available — live level stays at 0
});
return (): void => {
cancelAnimationFrame(animFrameRef.current);
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
analyserRef.current = null;
void ctx?.close();
};
}, []);
// rAF loop — reads RMS level and updates CSS variable on the track element
useEffect(() => {
const tick = (): void => {
animFrameRef.current = requestAnimationFrame(tick);
const analyser = analyserRef.current;
const data = dataRef.current;
const track = trackRef.current;
if (!analyser || !data || !track) return;
analyser.getFloatTimeDomainData(data);
// Peak detection — matches the gate's own measurement method
let peak = 0;
for (const s of data) peak = Math.max(peak, Math.abs(s));
const dbfs = peak > 0 ? 20 * Math.log10(peak) : -Infinity;
// Map dBFS value to slider percentage (same scale as min/max)
const clampedDb = Math.max(min, Math.min(max, dbfs));
const levelPct = ((clampedDb - min) / (max - min)) * 100;
track.style.setProperty("--mic-level", `${levelPct.toFixed(1)}%`);
track.dataset.active = String(levelPct >= thresholdPctRef.current);
};
animFrameRef.current = requestAnimationFrame(tick);
return (): void => cancelAnimationFrame(animFrameRef.current);
}, [min, max]);
// Keep threshold marker in sync with slider value
useEffect(() => {
const track = trackRef.current;
if (!track) return;
const thresholdPct = ((value - min) / (max - min)) * 100;
thresholdPctRef.current = thresholdPct;
track.style.setProperty("--threshold-pct", `${thresholdPct.toFixed(1)}%`);
}, [value, min, max]);
const onValueChange = useCallback(
([v]: number[]) => onValueChangeProp(v),
[onValueChangeProp],
);
const onValueCommit = useCallback(
([v]: number[]) => onValueCommitProp?.(v),
[onValueCommitProp],
);
return (
<Root
className={styles.slider}
value={[value]}
onValueChange={onValueChange}
onValueCommit={onValueCommit}
min={min}
max={max}
step={step}
>
<Track className={styles.track} ref={trackRef}>
<Range className={styles.range} />
</Track>
<Thumb className={styles.handle} aria-label={label} />
</Root>
);
};

View File

@@ -35,37 +35,30 @@ Please see LICENSE in the repository root for full details.
color: var(--cpd-color-text-secondary);
}
.noiseGateSection {
.vadSection {
margin-block-start: var(--cpd-space-6x);
}
.noiseGateHeading {
.vadHeading {
color: var(--cpd-color-text-secondary);
margin-block: var(--cpd-space-3x) 0;
}
.thresholdSlider {
margin-block-start: calc(-32px + var(--cpd-space-2x));
}
.noiseGateSeparator {
.vadSeparator {
margin-block: 6px var(--cpd-space-4x);
}
.advancedGate {
margin-top: var(--cpd-space-3x);
.vadRampLabel {
display: block;
margin-block: var(--cpd-space-6x) var(--cpd-space-1x);
}
.advancedGateToggle {
all: unset;
cursor: pointer;
font: var(--cpd-font-body-sm-semibold);
color: var(--cpd-color-text-secondary);
user-select: none;
.vadRampForm {
margin-top: 0;
}
.advancedGateToggle:hover {
color: var(--cpd-color-text-primary);
.vadSpacedSlider {
margin-block-start: var(--cpd-space-6x);
}
.restoreDefaults {

View File

@@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type ChangeEvent, type FC, type ReactNode, useEffect, useState, useCallback } from "react";
import { type FC, type ReactNode, useEffect, useId, useState } from "react";
import { useTranslation } from "react-i18next";
import { type MatrixClient } from "matrix-js-sdk";
import { Button, Heading, Root as Form, Separator } from "@vector-im/compound-web";
import {
Button,
Heading,
HelpMessage,
InlineField,
Label,
RadioControl,
Root as Form,
Separator,
} from "@vector-im/compound-web";
import { type Room as LivekitRoom } from "livekit-client";
import { Modal } from "../Modal";
@@ -24,18 +33,16 @@ import {
soundEffectVolume as soundEffectVolumeSetting,
backgroundBlur as backgroundBlurSetting,
developerMode,
noiseGateEnabled as noiseGateEnabledSetting,
noiseGateThreshold as noiseGateThresholdSetting,
noiseGateAttack as noiseGateAttackSetting,
noiseGateHold as noiseGateHoldSetting,
noiseGateRelease as noiseGateReleaseSetting,
transientSuppressorEnabled as transientSuppressorEnabledSetting,
transientThreshold as transientThresholdSetting,
transientRelease as transientReleaseSetting,
vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting,
vadMode as vadModeSetting,
vadAdvancedEnabled as vadAdvancedEnabledSetting,
vadAdvancedOpenThreshold as vadAdvancedOpenThresholdSetting,
vadAdvancedCloseThreshold as vadAdvancedCloseThresholdSetting,
vadHoldTime as vadHoldTimeSetting,
} from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider";
import { NoiseLevelSlider } from "./NoiseLevelSlider";
import { DeviceSelection } from "./DeviceSelection";
import { useTrackProcessor } from "../livekit/TrackProcessorContext";
import { DeveloperSettingsTab } from "./DeveloperSettingsTab";
@@ -116,41 +123,25 @@ export const SettingsModal: FC<Props> = ({
const [soundVolumeRaw, setSoundVolumeRaw] = useState(soundVolume);
const [showDeveloperSettingsTab] = useSetting(developerMode);
// Noise gate settings
const [noiseGateEnabled, setNoiseGateEnabled] = useSetting(noiseGateEnabledSetting);
const [noiseGateThreshold, setNoiseGateThreshold] = useSetting(noiseGateThresholdSetting);
const [noiseGateThresholdRaw, setNoiseGateThresholdRaw] = useState(noiseGateThreshold);
const [noiseGateAttack, setNoiseGateAttack] = useSetting(noiseGateAttackSetting);
const [noiseGateAttackRaw, setNoiseGateAttackRaw] = useState(noiseGateAttack);
const [noiseGateHold, setNoiseGateHold] = useSetting(noiseGateHoldSetting);
const [noiseGateHoldRaw, setNoiseGateHoldRaw] = useState(noiseGateHold);
const [noiseGateRelease, setNoiseGateRelease] = useSetting(noiseGateReleaseSetting);
const [noiseGateReleaseRaw, setNoiseGateReleaseRaw] = useState(noiseGateRelease);
const [showAdvancedGate, setShowAdvancedGate] = useState(false);
// Transient suppressor settings
const [transientEnabled, setTransientEnabled] = useSetting(transientSuppressorEnabledSetting);
const [transientThreshold, setTransientThreshold] = useSetting(transientThresholdSetting);
const [transientThresholdRaw, setTransientThresholdRaw] = useState(transientThreshold);
const [transientRelease, setTransientRelease] = useSetting(transientReleaseSetting);
const [transientReleaseRaw, setTransientReleaseRaw] = useState(transientRelease);
const resetTransientDefaults = useCallback((): void => {
const t = transientThresholdSetting.defaultValue;
const r = transientReleaseSetting.defaultValue;
setTransientThreshold(t); setTransientThresholdRaw(t);
setTransientRelease(r); setTransientReleaseRaw(r);
}, [setTransientThreshold, setTransientRelease]);
const resetGateDefaults = useCallback((): void => {
const a = noiseGateAttackSetting.defaultValue;
const h = noiseGateHoldSetting.defaultValue;
const r = noiseGateReleaseSetting.defaultValue;
setNoiseGateAttack(a); setNoiseGateAttackRaw(a);
setNoiseGateHold(h); setNoiseGateHoldRaw(h);
setNoiseGateRelease(r); setNoiseGateReleaseRaw(r);
}, [setNoiseGateAttack, setNoiseGateHold, setNoiseGateRelease]);
// Voice activity detection
const vadStateGroup = useId();
const vadModeRadioGroup = useId();
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadSensitivity, setVadSensitivity] = useSetting(vadPositiveThresholdSetting);
const [vadSensitivityRaw, setVadSensitivityRaw] = useState(vadSensitivity);
const [vadAdvanced, setVadAdvanced] = useSetting(vadAdvancedEnabledSetting);
const vadState = !vadActive ? "disabled" : vadAdvanced ? "advanced" : "simple";
const setVadState = (s: "disabled" | "simple" | "advanced"): void => {
setVadActive(s !== "disabled");
setVadAdvanced(s === "advanced");
};
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadAdvOpen, setVadAdvOpen] = useSetting(vadAdvancedOpenThresholdSetting);
const [vadAdvOpenRaw, setVadAdvOpenRaw] = useState(vadAdvOpen);
const [vadAdvClose, setVadAdvClose] = useSetting(vadAdvancedCloseThresholdSetting);
const [vadAdvCloseRaw, setVadAdvCloseRaw] = useState(vadAdvClose);
const [vadHold, setVadHold] = useSetting(vadHoldTimeSetting);
const [vadHoldRaw, setVadHoldRaw] = useState(vadHold);
const { available: isRageshakeAvailable } = useSubmitRageshake();
@@ -210,164 +201,185 @@ export const SettingsModal: FC<Props> = ({
/>
</div>
</Form>
<div className={styles.noiseGateSection}>
<div className={styles.vadSection}>
<Heading
type="body"
weight="semibold"
size="sm"
as="h4"
className={styles.noiseGateHeading}
className={styles.vadHeading}
>
Noise Gate
Voice Activity Detection
</Heading>
<Separator className={styles.noiseGateSeparator} />
<FieldRow>
<InputField
id="noiseGateEnabled"
type="checkbox"
label="Enable noise gate"
description="Suppress audio below a configurable threshold."
checked={noiseGateEnabled}
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setNoiseGateEnabled(e.target.checked)
}
/>
</FieldRow>
{noiseGateEnabled && (
<>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Threshold</span>
<p>Gate opens above this level, closes below it.</p>
<NoiseLevelSlider
label="Noise gate threshold"
value={noiseGateThresholdRaw}
onValueChange={setNoiseGateThresholdRaw}
onValueCommit={setNoiseGateThreshold}
min={-100}
max={0}
step={1}
/>
</div>
<div className={styles.advancedGate}>
<button
className={styles.advancedGateToggle}
onClick={(): void => setShowAdvancedGate((v) => !v)}
>
{showAdvancedGate ? "▾" : "▸"} Advanced settings
</button>
{showAdvancedGate && (
<Separator className={styles.vadSeparator} />
<Form>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "disabled"}
value="disabled"
onChange={(): void => setVadState("disabled")}
/>
}
>
<Label>Disabled</Label>
</InlineField>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "simple"}
value="simple"
onChange={(): void => setVadState("simple")}
/>
}
>
<Label>Simple</Label>
</InlineField>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "advanced"}
value="advanced"
onChange={(): void => setVadState("advanced")}
/>
}
>
<Label>Advanced</Label>
</InlineField>
</Form>
{vadState !== "disabled" && (
<>
{vadState === "simple" && (
<div className={styles.volumeSlider}>
<span className={styles.sliderLabel}>
Sensitivity: {Math.round(vadSensitivityRaw * 100)}%
</span>
<p>Higher values require more confident speech detection before opening.</p>
<Slider
label="VAD sensitivity"
value={vadSensitivityRaw}
onValueChange={setVadSensitivityRaw}
onValueCommit={setVadSensitivity}
min={0.1}
max={1.0}
step={0.05}
/>
</div>
)}
{vadState === "advanced" && (
<>
<div className={styles.volumeSlider}>
<label>Attack: {noiseGateAttackRaw} ms</label>
<p>How quickly the gate opens when signal exceeds threshold.</p>
<span className={styles.vadRampLabel}>Ramp profiles</span>
<Form className={styles.vadRampForm}>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "loose"}
value="loose"
onChange={(): void => setVadModeValue("loose")}
/>
}
>
<Label>Loose</Label>
<HelpMessage>256 samples / 16 ms 12 ms open / 32 ms close ramp.</HelpMessage>
</InlineField>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "standard"}
value="standard"
onChange={(): void => setVadModeValue("standard")}
/>
}
>
<Label>Standard</Label>
<HelpMessage>256 samples / 16 ms 5 ms open / 20 ms close ramp.</HelpMessage>
</InlineField>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "aggressive"}
value="aggressive"
onChange={(): void => setVadModeValue("aggressive")}
/>
}
>
<Label>Aggressive</Label>
<HelpMessage>160 samples / 10 ms 1 ms open / 5 ms close ramp.</HelpMessage>
</InlineField>
</Form>
<div className={`${styles.volumeSlider} ${styles.vadSpacedSlider}`}>
<span className={styles.sliderLabel}>
Open threshold: {Math.round(vadAdvOpenRaw * 100)}%
</span>
<p>Minimum confidence required to open the gate.</p>
<Slider
label="Noise gate attack"
value={noiseGateAttackRaw}
onValueChange={setNoiseGateAttackRaw}
onValueCommit={setNoiseGateAttack}
min={1}
max={100}
step={1}
tooltip={false}
label="VAD open threshold"
value={vadAdvOpenRaw}
onValueChange={setVadAdvOpenRaw}
onValueCommit={setVadAdvOpen}
min={0.1}
max={0.95}
step={0.05}
/>
</div>
<div className={styles.volumeSlider}>
<label>Hold: {noiseGateHoldRaw} ms</label>
<p>How long the gate stays open after signal drops below threshold.</p>
<span className={styles.sliderLabel}>
Close threshold: {Math.round(vadAdvCloseRaw * 100)}%
</span>
<p>Probability must drop below this to start the hold/close sequence.</p>
<Slider
label="Noise gate hold"
value={noiseGateHoldRaw}
onValueChange={setNoiseGateHoldRaw}
onValueCommit={setNoiseGateHold}
label="VAD close threshold"
value={vadAdvCloseRaw}
onValueChange={setVadAdvCloseRaw}
onValueCommit={setVadAdvClose}
min={0.05}
max={0.9}
step={0.05}
/>
</div>
<div className={`${styles.volumeSlider} ${styles.vadSpacedSlider}`}>
<span className={styles.sliderLabel}>
Hold time: {vadHoldRaw} ms
</span>
<p>How long to keep the gate open after speech drops below the close threshold.</p>
<Slider
label="VAD hold time"
value={vadHoldRaw}
onValueChange={setVadHoldRaw}
onValueCommit={setVadHold}
min={0}
max={500}
step={10}
tooltip={false}
/>
</div>
<div className={styles.volumeSlider}>
<label>Release: {noiseGateReleaseRaw} ms</label>
<p>How quickly the gate closes after hold expires.</p>
<Slider
label="Noise gate release"
value={noiseGateReleaseRaw}
onValueChange={setNoiseGateReleaseRaw}
onValueCommit={setNoiseGateRelease}
min={10}
max={500}
step={10}
tooltip={false}
max={2000}
step={50}
/>
</div>
<div className={styles.restoreDefaults}>
<Button kind="secondary" size="sm" onClick={resetGateDefaults}>
<Button
kind="secondary"
size="sm"
onClick={(): void => {
const defOpen = vadAdvancedOpenThresholdSetting.defaultValue;
const defClose = vadAdvancedCloseThresholdSetting.defaultValue;
const defHold = vadHoldTimeSetting.defaultValue;
setVadAdvOpen(defOpen); setVadAdvOpenRaw(defOpen);
setVadAdvClose(defClose); setVadAdvCloseRaw(defClose);
setVadHold(defHold); setVadHoldRaw(defHold);
setVadModeValue("standard");
}}
>
Restore defaults
</Button>
</div>
</>
)}
</div>
</>
)}
</div>
<div className={styles.noiseGateSection}>
<Heading
type="body"
weight="semibold"
size="sm"
as="h4"
className={styles.noiseGateHeading}
>
Transient Suppressor
</Heading>
<Separator className={styles.noiseGateSeparator} />
<FieldRow>
<InputField
id="transientEnabled"
type="checkbox"
label="Enable transient suppressor"
description="Cut sudden loud impacts like desk hits or mic bumps."
checked={transientEnabled}
onChange={(e: ChangeEvent<HTMLInputElement>): void =>
setTransientEnabled(e.target.checked)
}
/>
</FieldRow>
{transientEnabled && (
<>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`}>
<span className={styles.sliderLabel}>Sensitivity: {transientThresholdRaw} dB above background</span>
<p>Lower values catch more impacts; higher values only catch the loudest ones.</p>
<Slider
label="Transient threshold"
value={transientThresholdRaw}
onValueChange={setTransientThresholdRaw}
onValueCommit={setTransientThreshold}
min={8}
max={30}
step={1}
tooltip={false}
/>
</div>
<div className={styles.volumeSlider}>
<span className={styles.sliderLabel}>Release: {transientReleaseRaw} ms</span>
<p>How quickly audio returns after suppression.</p>
<Slider
label="Transient release"
value={transientReleaseRaw}
onValueChange={setTransientReleaseRaw}
onValueCommit={setTransientRelease}
min={20}
max={200}
step={10}
tooltip={false}
/>
</div>
<div className={styles.restoreDefaults}>
<Button kind="secondary" size="sm" onClick={resetTransientDefaults}>
Restore defaults
</Button>
</div>
</>
)}
</div>

View File

@@ -129,36 +129,16 @@ export const alwaysShowIphoneEarpiece = new Setting<boolean>(
false,
);
export const noiseGateEnabled = new Setting<boolean>(
"noise-gate-enabled",
false,
);
// Threshold in dBFS — gate opens above this, closes below it
export const noiseGateThreshold = new Setting<number>(
"noise-gate-threshold",
-60,
);
// Time in ms for the gate to fully open after signal exceeds threshold
export const noiseGateAttack = new Setting<number>("noise-gate-attack", 25);
// Time in ms the gate stays open after signal drops below threshold
export const noiseGateHold = new Setting<number>("noise-gate-hold", 200);
// Time in ms for the gate to fully close after hold expires
export const noiseGateRelease = new Setting<number>("noise-gate-release", 150);
export const transientSuppressorEnabled = new Setting<boolean>(
"transient-suppressor-enabled",
false,
);
// How many dB above the background RMS a peak must be to trigger suppression
export const transientThreshold = new Setting<number>(
"transient-suppressor-threshold",
15,
);
// Time in ms for suppression to fade after transient ends
export const transientRelease = new Setting<number>(
"transient-suppressor-release",
80,
);
export const vadEnabled = new Setting<boolean>("vad-enabled", false);
// Simple mode: single sensitivity slider (open threshold); close = open - 0.1
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.7);
// standard: 5ms/20ms aggressive: 1ms/5ms loose: 12ms/32ms
export const vadMode = new Setting<"standard" | "aggressive" | "loose">("vad-mode", "standard");
// Advanced settings (override simple mode when enabled)
export const vadAdvancedEnabled = new Setting<boolean>("vad-advanced-enabled", false);
export const vadAdvancedOpenThreshold = new Setting<number>("vad-advanced-open-threshold", 0.7);
export const vadAdvancedCloseThreshold = new Setting<number>("vad-advanced-close-threshold", 0.6);
export const vadHoldTime = new Setting<number>("vad-hold-time", 300);
export enum MatrixRTCMode {
Legacy = "legacy",

View File

@@ -33,19 +33,18 @@ import {
} from "../../../livekit/TrackProcessorContext.tsx";
import { getUrlParams } from "../../../UrlParams.ts";
import {
noiseGateEnabled,
noiseGateThreshold,
noiseGateAttack,
noiseGateHold,
noiseGateRelease,
transientSuppressorEnabled,
transientThreshold,
transientRelease,
vadEnabled,
vadPositiveThreshold,
vadMode,
vadAdvancedEnabled,
vadAdvancedOpenThreshold,
vadAdvancedCloseThreshold,
vadHoldTime,
} from "../../../settings/settings.ts";
import {
type NoiseGateParams,
NoiseGateTransformer,
} from "../../../livekit/NoiseGateTransformer.ts";
type TenVadParams,
TenVadTransformer,
} from "../../../livekit/TenVadTransformer.ts";
import { observeTrackReference$ } from "../../observeTrackReference";
import { type Connection } from "../remoteMembers/Connection.ts";
import { ObservableScope } from "../../ObservableScope.ts";
@@ -90,7 +89,7 @@ export class Publisher {
// Setup track processor syncing (blur)
this.observeTrackProcessors(this.scope, room, trackerProcessorState$);
// Setup noise gate on the local microphone track
this.applyNoiseGate(this.scope, room);
this.applyTenVad(this.scope, room);
// Observe media device changes and update LiveKit active devices accordingly
this.observeMediaDevices(this.scope, devices, controlledAudioDevices);
@@ -418,7 +417,7 @@ export class Publisher {
});
}
private applyNoiseGate(scope: ObservableScope, room: LivekitRoom): void {
private applyTenVad(scope: ObservableScope, room: LivekitRoom): void {
// Observe the local microphone track
const audioTrack$ = scope.behavior(
observeTrackReference$(
@@ -433,72 +432,85 @@ export class Publisher {
null,
);
let transformer: NoiseGateTransformer | null = null;
let transformer: TenVadTransformer | null = null;
let audioCtx: AudioContext | null = null;
const currentParams = (): NoiseGateParams => ({
threshold: noiseGateThreshold.getValue(),
attackMs: noiseGateAttack.getValue(),
holdMs: noiseGateHold.getValue(),
releaseMs: noiseGateRelease.getValue(),
transientEnabled: transientSuppressorEnabled.getValue(),
transientThresholdDb: transientThreshold.getValue(),
transientReleaseMs: transientRelease.getValue(),
});
const currentParams = (): TenVadParams => {
const isAdvanced = vadAdvancedEnabled.getValue();
if (isAdvanced) {
return {
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadAdvancedOpenThreshold.getValue(),
vadNegativeThreshold: vadAdvancedCloseThreshold.getValue(),
vadMode: vadMode.getValue(),
holdMs: vadHoldTime.getValue(),
};
}
const openT = vadPositiveThreshold.getValue();
return {
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: openT,
vadNegativeThreshold: Math.max(0, openT - 0.1),
vadMode: "standard",
holdMs: 0,
};
};
// Attach / detach processor when enabled state or the track changes.
combineLatest([audioTrack$, noiseGateEnabled.value$])
// Attach / detach processor when VAD is toggled or the track changes.
combineLatest([audioTrack$, vadEnabled.value$])
.pipe(scope.bind())
.subscribe(([audioTrack, enabled]) => {
.subscribe(([audioTrack, vadActive]) => {
if (!audioTrack) return;
if (enabled && !audioTrack.getProcessor()) {
const shouldAttach = vadActive;
if (shouldAttach && !audioTrack.getProcessor()) {
const params = currentParams();
this.logger.info("[NoiseGate] attaching processor, params:", params);
transformer = new NoiseGateTransformer(params);
this.logger.info("[TenVad] attaching processor, params:", params);
transformer = new TenVadTransformer(params);
audioCtx = new AudioContext();
this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
this.logger.info("[TenVad] AudioContext state before resume:", audioCtx.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(audioCtx);
audioCtx.resume().then(() => {
this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state);
return audioTrack
// eslint-disable-next-line @typescript-eslint/no-explicit-any
.setProcessor(transformer as any);
audioCtx.resume().then(async () => {
this.logger.info("[TenVad] AudioContext state after resume:", audioCtx?.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return audioTrack.setProcessor(transformer as any);
}).then(() => {
this.logger.info("[NoiseGate] setProcessor resolved");
this.logger.info("[TenVad] setProcessor resolved");
}).catch((e: unknown) => {
this.logger.error("[NoiseGate] setProcessor failed", e);
this.logger.error("[TenVad] setProcessor failed", e);
});
} else if (!enabled && audioTrack.getProcessor()) {
this.logger.info("[NoiseGate] removing processor");
} else if (!shouldAttach && audioTrack.getProcessor()) {
this.logger.info("[TenVad] removing processor");
void audioTrack.stopProcessor();
void audioCtx?.close();
audioCtx = null;
transformer = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(undefined);
} else if (shouldAttach && audioTrack.getProcessor()) {
// Processor already attached — push updated params (e.g. vadActive toggled)
transformer?.updateParams(currentParams());
} else {
this.logger.info("[NoiseGate] tick — enabled:", enabled, "hasProcessor:", !!audioTrack.getProcessor());
this.logger.info(
"[TenVad] tick — vadActive:", vadActive,
"hasProcessor:", !!audioTrack.getProcessor(),
);
}
});
// Push param changes to the live worklet without recreating the processor.
// Push VAD param changes to the live worklet.
combineLatest([
noiseGateThreshold.value$,
noiseGateAttack.value$,
noiseGateHold.value$,
noiseGateRelease.value$,
transientSuppressorEnabled.value$,
transientThreshold.value$,
transientRelease.value$,
vadEnabled.value$,
vadPositiveThreshold.value$,
vadMode.value$,
vadAdvancedEnabled.value$,
vadAdvancedOpenThreshold.value$,
vadAdvancedCloseThreshold.value$,
vadHoldTime.value$,
])
.pipe(scope.bind())
.subscribe(([threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs]) => {
transformer?.updateParams({
threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs,
});
.subscribe(() => {
transformer?.updateParams(currentParams());
});
}
@@ -518,5 +530,4 @@ export class Publisher {
);
trackProcessorSync(scope, track$, trackerProcessorState$);
}
}