feat: adding TEN-VAD wasm to project and configuration options on audio
Some checks failed
Build / build_full_element_call (pull_request) Has been cancelled
Build / build_embedded_element_call (pull_request) Has been cancelled
Build / build_sdk_element_call (pull_request) Has been cancelled
Lint, format & type check / Lint, format & type check (pull_request) Has been cancelled
Build & publish embedded packages for releases / Versioning (pull_request) Has been cancelled
Test / Run unit tests (pull_request) Has been cancelled
Test / Run end-to-end tests (pull_request) Has been cancelled
GitHub Actions Security Analysis with zizmor 🌈 / Run zizmor 🌈 (pull_request) Has been cancelled
Prevent blocked / Prevent blocked (pull_request_target) Has been cancelled
PR changelog label / pr-changelog-label (pull_request_target) Has been cancelled
Build / deploy_develop (pull_request) Has been cancelled
Build / docker_for_develop (pull_request) Has been cancelled
Build & publish embedded packages for releases / build_element_call (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish tarball (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish NPM (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish Android AAR (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish SwiftPM Library (pull_request) Has been cancelled
Build & publish embedded packages for releases / Update release notes (pull_request) Has been cancelled

settings
This commit is contained in:
mk
2026-03-28 12:09:07 -03:00
parent 385ab0a0ed
commit fd39daae5e
10 changed files with 979 additions and 7 deletions

30
public/vad/ten_vad.js Normal file
View File

@@ -0,0 +1,30 @@
var createVADModule = (() => {
var _scriptDir = import.meta.url;
return (
function(createVADModule) {
createVADModule = createVADModule || {};
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
return createVADModule.ready
}
);
})();
export default createVADModule;

BIN
public/vad/ten_vad.wasm Normal file

Binary file not shown.

View File

@@ -31,6 +31,7 @@ interface Props {
max: number;
step: number;
disabled?: boolean;
tooltip?: boolean;
}
/**
@@ -46,6 +47,7 @@ export const Slider: FC<Props> = ({
max,
step,
disabled,
tooltip = true,
}) => {
const onValueChange = useCallback(
([v]: number[]) => onValueChangeProp(v),
@@ -71,9 +73,13 @@ export const Slider: FC<Props> = ({
<Range className={styles.highlight} />
</Track>
{/* Note: This is expected not to be visible on mobile.*/}
{tooltip ? (
<Tooltip placement="top" label={Math.round(value * 100).toString() + "%"}>
<Thumb className={styles.handle} aria-label={label} />
</Tooltip>
) : (
<Thumb className={styles.handle} aria-label={label} />
)}
</Root>
);
};

View File

@@ -0,0 +1,391 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
declare const sampleRate: number;
declare class AudioWorkletProcessor {
public readonly port: MessagePort;
public constructor(options?: {
processorOptions?: Record<string, unknown>;
});
public process(
inputs: Float32Array[][],
outputs: Float32Array[][],
parameters: Record<string, Float32Array>,
): boolean;
}
declare function registerProcessor(
name: string,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
processorCtor: new (...args: any[]) => AudioWorkletProcessor,
): void;
interface NoiseGateParams {
// TEN-VAD params
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01)
vadMode: "standard" | "aggressive" | "loose";
holdMs: number; // hold time before closing gate (ms); 0 = no hold
}
interface VADGateMessage {
type: "vad-gate";
open: boolean;
}
/**
* Thin synchronous wrapper around the TEN-VAD Emscripten WASM module.
* Instantiated synchronously in the AudioWorklet constructor from a
* pre-compiled WebAssembly.Module passed via processorOptions.
*/
class TenVADRuntime {
private readonly mem: WebAssembly.Memory;
private readonly freeFn: (ptr: number) => void;
private readonly processFn: (
handle: number,
audioPtr: number,
hopSize: number,
probPtr: number,
flagPtr: number,
) => number;
private readonly destroyFn: (handle: number) => number;
private readonly handle: number;
private readonly audioBufPtr: number;
private readonly probPtr: number;
private readonly flagPtr: number;
public readonly hopSize: number;
public constructor(
module: WebAssembly.Module,
hopSize: number,
threshold: number,
) {
this.hopSize = hopSize;
// Late-bound memory reference — emscripten_resize_heap and memmove
// are only called after instantiation, so closing over this is safe.
const state = { mem: null as WebAssembly.Memory | null };
const imports = {
a: {
// abort
a: (): never => {
throw new Error("ten_vad abort");
},
// fd_write / proc_exit stub
b: (): number => 0,
// emscripten_resize_heap
c: (reqBytes: number): number => {
if (!state.mem) return 0;
try {
const cur = state.mem.buffer.byteLength;
if (cur >= reqBytes) return 1;
state.mem.grow(Math.ceil((reqBytes - cur) / 65536));
return 1;
} catch {
return 0;
}
},
// fd_write stub
d: (): number => 0,
// environ stub
e: (): number => 0,
// memmove
f: (dest: number, src: number, len: number): void => {
if (state.mem) {
new Uint8Array(state.mem.buffer).copyWithin(dest, src, src + len);
}
},
},
};
// Synchronous instantiation — valid in Worker/AudioWorklet global scope
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const instance = new WebAssembly.Instance(module, imports as any);
const asm = instance.exports as {
g: WebAssembly.Memory; // exported memory
h: () => void; // __wasm_call_ctors
i: (n: number) => number; // malloc
j: (p: number) => void; // free
k: (handlePtr: number, hopSize: number, threshold: number) => number; // ten_vad_create
l: (handle: number, audioPtr: number, hopSize: number, probPtr: number, flagPtr: number) => number; // ten_vad_process
m: (handle: number) => number; // ten_vad_destroy
};
state.mem = asm.g;
this.mem = asm.g;
this.freeFn = asm.j;
this.processFn = asm.l;
this.destroyFn = asm.m;
// Run Emscripten static constructors
asm.h();
// Allocate persistent buffers (malloc is 8-byte aligned, so alignment is fine)
this.audioBufPtr = asm.i(hopSize * 2); // Int16Array
this.probPtr = asm.i(4); // float
this.flagPtr = asm.i(4); // int
// Create VAD handle — ten_vad_create(void** handle, int hopSize, float threshold)
const handlePtrPtr = asm.i(4);
const ret = asm.k(handlePtrPtr, hopSize, threshold);
if (ret !== 0) throw new Error(`ten_vad_create failed: ${ret}`);
this.handle = new Int32Array(this.mem.buffer)[handlePtrPtr >> 2];
asm.j(handlePtrPtr);
}
/** Process one hop of Int16 audio. Returns speech probability [01]. */
public process(samples: Int16Array): number {
new Int16Array(this.mem.buffer).set(samples, this.audioBufPtr >> 1);
this.processFn(
this.handle,
this.audioBufPtr,
this.hopSize,
this.probPtr,
this.flagPtr,
);
return new Float32Array(this.mem.buffer)[this.probPtr >> 2];
}
public destroy(): void {
this.destroyFn(this.handle);
this.freeFn(this.audioBufPtr);
this.freeFn(this.probPtr);
this.freeFn(this.flagPtr);
}
}
/**
* AudioWorkletProcessor implementing an in-worklet TEN-VAD gate running
* per-sample.
*
* TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz),
* runs the TEN-VAD model synchronously every 256 samples (16 ms), and
* controls vadGateOpen with hysteresis. No IPC round-trip required.
* Asymmetric ramp: 5 ms open (minimise speech onset masking), 20 ms close
* (de-click on silence).
*/
class NoiseGateProcessor extends AudioWorkletProcessor {
// VAD gate state
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
private vadAttenuation = 1.0;
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
// TEN-VAD state
private vadEnabled = false;
private vadPositiveThreshold = 0.5;
private vadNegativeThreshold = 0.3;
private holdMs = 0;
private vadHoldHops = 0; // hold expressed in VAD hops
private vadHoldCounter = 0; // hops of continuous sub-threshold signal while gate is open
private tenVadRuntime: TenVADRuntime | null = null;
private tenVadModule: WebAssembly.Module | undefined = undefined;
// 3:1 decimation from AudioContext sample rate to 16 kHz
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
private decPhase = 0;
private decAcc = 0;
// Buffer sized for max hop (256); vadHopSize tracks how many samples to collect
private readonly vadHopBuf = new Int16Array(256);
private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms)
private vadHopCount = 0;
private logCounter = 0;
public constructor(options?: {
processorOptions?: Record<string, unknown>;
}) {
super(options);
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
this.tenVadModule = options?.processorOptions?.tenVadModule as
| WebAssembly.Module
| undefined;
if (this.tenVadModule) {
try {
// Default: standard mode — 256 samples @ 16 kHz = 16 ms
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5);
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
});
} catch (e) {
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] TEN-VAD init failed: " + String(e),
});
}
}
this.port.onmessage = (
e: MessageEvent<NoiseGateParams | VADGateMessage>,
): void => {
if ((e.data as VADGateMessage).type === "vad-gate") {
this.vadGateOpen = (e.data as VADGateMessage).open;
} else {
this.updateParams(e.data as NoiseGateParams);
}
};
this.updateParams({
vadEnabled: false,
vadPositiveThreshold: 0.5,
vadNegativeThreshold: 0.3,
vadMode: "standard",
holdMs: 0,
});
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate,
});
}
private updateParams(p: NoiseGateParams): void {
this.vadEnabled = p.vadEnabled ?? false;
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
this.holdMs = p.holdMs ?? 0;
const newMode = p.vadMode ?? "standard";
if (newMode === "aggressive") {
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
} else if (newMode === "loose") {
this.vadOpenRampRate = 1.0 / (0.012 * sampleRate); // 12 ms
this.vadCloseRampRate = 1.0 / (0.032 * sampleRate); // 32 ms
} else {
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
}
// Hop size: aggressive=160 (10 ms @ 16 kHz), others=256 (16 ms)
const newHopSize = newMode === "aggressive" ? 160 : 256;
if (newHopSize !== this.vadHopSize && this.tenVadModule) {
this.tenVadRuntime?.destroy();
this.tenVadRuntime = null;
this.vadHopCount = 0;
try {
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5);
} catch (e) {
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) });
}
}
this.vadHopSize = newHopSize;
// Recompute hold in hops: ceil((holdMs / 1000) * 16000 / vadHopSize)
this.vadHoldHops = this.holdMs > 0
? Math.ceil((this.holdMs / 1000) * 16000 / this.vadHopSize)
: 0;
this.vadHoldCounter = 0;
if (!this.vadEnabled) this.vadGateOpen = true;
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] params updated: vadEnabled=" + p.vadEnabled
+ " vadPos=" + p.vadPositiveThreshold
+ " vadNeg=" + p.vadNegativeThreshold
+ " vadMode=" + newMode
+ " holdMs=" + this.holdMs,
});
}
public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean {
const input = inputs[0];
const output = outputs[0];
if (!input || input.length === 0) return true;
const blockSize = input[0]?.length ?? 128;
for (let i = 0; i < blockSize; i++) {
// --- TEN-VAD in-worklet processing ---
// Accumulate raw mono samples with decRatio:1 decimation (48 kHz → 16 kHz).
// Every 256 output samples (16 ms) run the WASM VAD and update vadGateOpen.
if (this.vadEnabled && this.tenVadRuntime !== null) {
this.decAcc += input[0]?.[i] ?? 0;
this.decPhase++;
if (this.decPhase >= this.decRatio) {
this.decPhase = 0;
const avg = this.decAcc / this.decRatio;
this.decAcc = 0;
// Float32 [-1,1] → Int16 with clamping
const s16 =
avg >= 1.0
? 32767
: avg <= -1.0
? -32768
: (avg * 32767 + 0.5) | 0;
this.vadHopBuf[this.vadHopCount++] = s16;
if (this.vadHopCount >= this.vadHopSize) {
this.vadHopCount = 0;
const prob = this.tenVadRuntime.process(this.vadHopBuf);
if (prob >= this.vadPositiveThreshold) {
// Speech detected — open gate, reset hold counter
this.vadGateOpen = true;
this.vadHoldCounter = 0;
} else if (prob < this.vadNegativeThreshold) {
if (this.vadGateOpen) {
if (this.vadHoldHops === 0) {
this.vadGateOpen = false;
} else {
this.vadHoldCounter++;
if (this.vadHoldCounter >= this.vadHoldHops) {
this.vadGateOpen = false;
this.vadHoldCounter = 0;
}
}
}
} else {
// Ambiguous zone — reset hold counter so hold only fires on sustained silence
this.vadHoldCounter = 0;
}
}
}
}
// Asymmetric ramp: fast open (5 ms) to minimise speech onset masking,
// slow close (20 ms) to de-click on silence transitions.
const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
if (this.vadAttenuation < vadTarget) {
this.vadAttenuation = Math.min(
vadTarget,
this.vadAttenuation + this.vadOpenRampRate,
);
} else if (this.vadAttenuation > vadTarget) {
this.vadAttenuation = Math.max(
vadTarget,
this.vadAttenuation - this.vadCloseRampRate,
);
}
const gain = this.vadAttenuation;
for (let c = 0; c < output.length; c++) {
const inCh = input[c] ?? input[0];
const outCh = output[c];
if (inCh && outCh) {
outCh[i] = (inCh[i] ?? 0) * gain;
}
}
}
this.logCounter++;
if (this.logCounter % 375 === 0) {
this.port.postMessage({
type: "log",
msg: "[NoiseGate worklet] vadOpen=" + this.vadGateOpen
+ " vadAtten=" + this.vadAttenuation.toFixed(3),
});
}
return true;
}
}
registerProcessor("noise-gate-processor", NoiseGateProcessor);

View File

@@ -0,0 +1,163 @@
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type Track } from "livekit-client";
import { logger } from "matrix-js-sdk/lib/logger";
// ?worker&url tells Vite to compile the TypeScript worklet and return its URL.
// Without this, Vite copies the .ts file verbatim and the browser rejects it.
import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url";
const log = logger.getChild("[NoiseGateTransformer]");
export interface NoiseGateParams {
// TEN-VAD params — processed entirely inside the AudioWorklet
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when prob >= this (01)
vadNegativeThreshold: number; // close gate when prob < this (01); computed by Publisher
vadMode: "standard" | "aggressive" | "loose";
holdMs: number; // hold time before closing gate (ms); 0 = no hold
}
/**
* Matches LiveKit's AudioProcessorOptions (experimental API, not publicly
* exported, so we declare it locally based on the type definitions).
*/
interface AudioProcessorOptions {
kind: Track.Kind.Audio;
track: MediaStreamTrack;
audioContext: AudioContext;
element?: HTMLMediaElement;
}
/**
* Matches LiveKit's TrackProcessor<Track.Kind.Audio> interface.
*/
export interface AudioTrackProcessor {
name: string;
processedTrack?: MediaStreamTrack;
init(opts: AudioProcessorOptions): Promise<void>;
restart(opts: AudioProcessorOptions): Promise<void>;
destroy(): Promise<void>;
}
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
function getTenVADModule(): Promise<WebAssembly.Module> {
if (!tenVadModulePromise) {
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
.then((r) => {
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
return r.arrayBuffer();
})
.then((buf) => WebAssembly.compile(buf))
.catch((e) => {
// Clear the cache so a retry is possible on next attach
tenVadModulePromise = null;
throw e;
});
}
return tenVadModulePromise;
}
/**
* LiveKit audio track processor that applies TEN-VAD via AudioWorklet.
*
* The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
* via processorOptions so it runs synchronously inside the audio thread —
* no IPC round-trip, ~16 ms VAD latency.
*
* Audio graph: sourceNode → workletNode → destinationNode
* processedTrack is destinationNode.stream.getAudioTracks()[0]
*/
export class NoiseGateTransformer implements AudioTrackProcessor {
public readonly name = "noise-gate";
public processedTrack?: MediaStreamTrack;
private workletNode?: AudioWorkletNode;
private sourceNode?: MediaStreamAudioSourceNode;
private destinationNode?: MediaStreamAudioDestinationNode;
private params: NoiseGateParams;
public constructor(params: NoiseGateParams) {
this.params = { ...params };
}
public async init(opts: AudioProcessorOptions): Promise<void> {
const { track, audioContext } = opts;
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
// Fetch and compile the TEN-VAD WASM module (cached after first call)
let tenVadModule: WebAssembly.Module | undefined;
try {
tenVadModule = await getTenVADModule();
log.info("TEN-VAD WASM module compiled");
} catch (e) {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
}
log.info("loading worklet from:", compiledWorkletUrl);
await audioContext.audioWorklet.addModule(compiledWorkletUrl);
log.info("worklet module loaded");
this.workletNode = new AudioWorkletNode(
audioContext,
"noise-gate-processor",
{
processorOptions: {
tenVadModule,
},
},
);
this.workletNode.port.onmessage = (
e: MessageEvent<{ type: string; msg: string }>,
): void => {
if (e.data?.type === "log") log.debug(e.data.msg);
};
this.sendParams();
this.sourceNode = audioContext.createMediaStreamSource(
new MediaStream([track]),
);
this.destinationNode = audioContext.createMediaStreamDestination();
this.sourceNode.connect(this.workletNode);
this.workletNode.connect(this.destinationNode);
this.processedTrack = this.destinationNode.stream.getAudioTracks()[0];
log.info("graph wired, processedTrack:", this.processedTrack);
}
public async restart(opts: AudioProcessorOptions): Promise<void> {
await this.destroy();
await this.init(opts);
}
// eslint-disable-next-line @typescript-eslint/require-await
public async destroy(): Promise<void> {
this.sourceNode?.disconnect();
this.workletNode?.disconnect();
this.destinationNode?.disconnect();
this.sourceNode = undefined;
this.workletNode = undefined;
this.destinationNode = undefined;
this.processedTrack = undefined;
}
/** Push updated gate/VAD parameters to the running worklet. */
public updateParams(params: NoiseGateParams): void {
this.params = { ...params };
this.sendParams();
}
private sendParams(): void {
if (!this.workletNode) return;
log.debug("sendParams:", this.params);
this.workletNode.port.postMessage(this.params);
}
}

View File

@@ -21,7 +21,8 @@ Please see LICENSE in the repository root for full details.
margin-top: var(--cpd-space-2x);
}
.volumeSlider > label {
.volumeSlider > label,
.sliderLabel {
margin-bottom: var(--cpd-space-1x);
display: block;
}
@@ -33,3 +34,40 @@ Please see LICENSE in the repository root for full details.
.volumeSlider > p {
color: var(--cpd-color-text-secondary);
}
.noiseGateSection {
margin-block-start: var(--cpd-space-6x);
}
.noiseGateHeading {
color: var(--cpd-color-text-secondary);
margin-block: var(--cpd-space-3x) 0;
}
.thresholdSlider {
margin-block-start: calc(-32px + var(--cpd-space-2x));
}
.noiseGateSeparator {
margin-block: 6px var(--cpd-space-4x);
}
.advancedGate {
margin-top: var(--cpd-space-3x);
}
.advancedGateToggle {
all: unset;
cursor: pointer;
font: var(--cpd-font-body-sm-semibold);
color: var(--cpd-color-text-secondary);
user-select: none;
}
.advancedGateToggle:hover {
color: var(--cpd-color-text-primary);
}
.restoreDefaults {
margin-top: var(--cpd-space-6x);
}

View File

@@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type FC, type ReactNode, useEffect, useState } from "react";
import { type FC, type ReactNode, useEffect, useId, useState } from "react";
import { useTranslation } from "react-i18next";
import { type MatrixClient } from "matrix-js-sdk";
import { Button, Root as Form, Separator } from "@vector-im/compound-web";
import {
Button,
Heading,
HelpMessage,
InlineField,
Label,
RadioControl,
Root as Form,
Separator,
} from "@vector-im/compound-web";
import { type Room as LivekitRoom } from "livekit-client";
import { Modal } from "../Modal";
@@ -24,6 +33,13 @@ import {
soundEffectVolume as soundEffectVolumeSetting,
backgroundBlur as backgroundBlurSetting,
developerMode,
vadEnabled as vadEnabledSetting,
vadPositiveThreshold as vadPositiveThresholdSetting,
vadMode as vadModeSetting,
vadAdvancedEnabled as vadAdvancedEnabledSetting,
vadAdvancedOpenThreshold as vadAdvancedOpenThresholdSetting,
vadAdvancedCloseThreshold as vadAdvancedCloseThresholdSetting,
vadHoldTime as vadHoldTimeSetting,
} from "./settings";
import { PreferencesSettingsTab } from "./PreferencesSettingsTab";
import { Slider } from "../Slider";
@@ -107,6 +123,26 @@ export const SettingsModal: FC<Props> = ({
const [soundVolumeRaw, setSoundVolumeRaw] = useState(soundVolume);
const [showDeveloperSettingsTab] = useSetting(developerMode);
// Voice activity detection
const vadStateGroup = useId();
const vadModeRadioGroup = useId();
const [vadActive, setVadActive] = useSetting(vadEnabledSetting);
const [vadSensitivity, setVadSensitivity] = useSetting(vadPositiveThresholdSetting);
const [vadSensitivityRaw, setVadSensitivityRaw] = useState(vadSensitivity);
const [vadAdvanced, setVadAdvanced] = useSetting(vadAdvancedEnabledSetting);
const vadState = !vadActive ? "disabled" : vadAdvanced ? "advanced" : "simple";
const setVadState = (s: "disabled" | "simple" | "advanced"): void => {
setVadActive(s !== "disabled");
setVadAdvanced(s === "advanced");
};
const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting);
const [vadAdvOpen, setVadAdvOpen] = useSetting(vadAdvancedOpenThresholdSetting);
const [vadAdvOpenRaw, setVadAdvOpenRaw] = useState(vadAdvOpen);
const [vadAdvClose, setVadAdvClose] = useSetting(vadAdvancedCloseThresholdSetting);
const [vadAdvCloseRaw, setVadAdvCloseRaw] = useState(vadAdvClose);
const [vadHold, setVadHold] = useSetting(vadHoldTimeSetting);
const [vadHoldRaw, setVadHoldRaw] = useState(vadHold);
const { available: isRageshakeAvailable } = useSubmitRageshake();
// For controlled devices, we will not show the input section:
@@ -165,6 +201,189 @@ export const SettingsModal: FC<Props> = ({
/>
</div>
</Form>
<div className={styles.noiseGateSection}>
<Heading
type="body"
weight="semibold"
size="sm"
as="h4"
className={styles.noiseGateHeading}
>
Voice Activity Detection
</Heading>
<Separator className={styles.noiseGateSeparator} />
<Form>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "disabled"}
value="disabled"
onChange={(): void => setVadState("disabled")}
/>
}
>
<Label>Disabled</Label>
</InlineField>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "simple"}
value="simple"
onChange={(): void => setVadState("simple")}
/>
}
>
<Label>Simple</Label>
</InlineField>
<InlineField
name={vadStateGroup}
control={
<RadioControl
checked={vadState === "advanced"}
value="advanced"
onChange={(): void => setVadState("advanced")}
/>
}
>
<Label>Advanced</Label>
</InlineField>
</Form>
{vadState !== "disabled" && (
<>
{vadState === "simple" && (
<div className={styles.volumeSlider}>
<span className={styles.sliderLabel}>
Sensitivity: {Math.round(vadSensitivityRaw * 100)}%
</span>
<p>Higher values require more confident speech detection before opening.</p>
<Slider
label="VAD sensitivity"
value={vadSensitivityRaw}
onValueChange={setVadSensitivityRaw}
onValueCommit={setVadSensitivity}
min={0.1}
max={1.0}
step={0.05}
tooltip={false}
/>
</div>
)}
{vadState === "advanced" && (
<>
<span className={styles.sliderLabel} style={{ display: "block", marginTop: "24px", marginBottom: "4px" }}>Ramp profiles</span>
<Form style={{ marginTop: "0" }}>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "loose"}
value="loose"
onChange={(): void => setVadModeValue("loose")}
/>
}
>
<Label>Loose</Label>
<HelpMessage>256 samples / 16 ms 12 ms open / 32 ms close ramp.</HelpMessage>
</InlineField>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "standard"}
value="standard"
onChange={(): void => setVadModeValue("standard")}
/>
}
>
<Label>Standard</Label>
<HelpMessage>256 samples / 16 ms 5 ms open / 20 ms close ramp.</HelpMessage>
</InlineField>
<InlineField
name={vadModeRadioGroup}
control={
<RadioControl
checked={vadModeValue === "aggressive"}
value="aggressive"
onChange={(): void => setVadModeValue("aggressive")}
/>
}
>
<Label>Aggressive</Label>
<HelpMessage>160 samples / 10 ms 1 ms open / 5 ms close ramp.</HelpMessage>
</InlineField>
</Form>
<div className={`${styles.volumeSlider} ${styles.thresholdSlider}`} style={{ marginTop: "24px" }}>
<span className={styles.sliderLabel}>
Open threshold: {Math.round(vadAdvOpenRaw * 100)}%
</span>
<p>Minimum confidence required to open the gate.</p>
<Slider
label="VAD open threshold"
value={vadAdvOpenRaw}
onValueChange={setVadAdvOpenRaw}
onValueCommit={setVadAdvOpen}
min={0.1}
max={0.95}
step={0.05}
tooltip={false}
/>
</div>
<div className={styles.volumeSlider}>
<span className={styles.sliderLabel}>
Close threshold: {Math.round(vadAdvCloseRaw * 100)}%
</span>
<p>Probability must drop below this to start the hold/close sequence.</p>
<Slider
label="VAD close threshold"
value={vadAdvCloseRaw}
onValueChange={setVadAdvCloseRaw}
onValueCommit={setVadAdvClose}
min={0.05}
max={0.9}
step={0.05}
tooltip={false}
/>
</div>
<div className={styles.volumeSlider} style={{ marginTop: "24px" }}>
<span className={styles.sliderLabel}>
Hold time: {vadHoldRaw} ms
</span>
<p>How long to keep the gate open after speech drops below the close threshold.</p>
<Slider
label="VAD hold time"
value={vadHoldRaw}
onValueChange={setVadHoldRaw}
onValueCommit={setVadHold}
min={0}
max={2000}
step={50}
tooltip={false}
/>
</div>
<div className={styles.restoreDefaults}>
<Button
kind="secondary"
size="sm"
onClick={(): void => {
const defOpen = vadAdvancedOpenThresholdSetting.defaultValue;
const defClose = vadAdvancedCloseThresholdSetting.defaultValue;
const defHold = vadHoldTimeSetting.defaultValue;
setVadAdvOpen(defOpen); setVadAdvOpenRaw(defOpen);
setVadAdvClose(defClose); setVadAdvCloseRaw(defClose);
setVadHold(defHold); setVadHoldRaw(defHold);
setVadModeValue("standard");
}}
>
Restore defaults
</Button>
</div>
</>
)}
</>
)}
</div>
</>
),
};

View File

@@ -129,6 +129,17 @@ export const alwaysShowIphoneEarpiece = new Setting<boolean>(
false,
);
export const vadEnabled = new Setting<boolean>("vad-enabled", false);
// Simple mode: single sensitivity slider (open threshold); close = open - 0.1
export const vadPositiveThreshold = new Setting<number>("vad-positive-threshold", 0.7);
// standard: 5ms/20ms aggressive: 1ms/5ms loose: 12ms/32ms
export const vadMode = new Setting<"standard" | "aggressive" | "loose">("vad-mode", "standard");
// Advanced settings (override simple mode when enabled)
export const vadAdvancedEnabled = new Setting<boolean>("vad-advanced-enabled", false);
export const vadAdvancedOpenThreshold = new Setting<number>("vad-advanced-open-threshold", 0.7);
export const vadAdvancedCloseThreshold = new Setting<number>("vad-advanced-close-threshold", 0.6);
export const vadHoldTime = new Setting<number>("vad-hold-time", 300);
export enum MatrixRTCMode {
Legacy = "legacy",
Compatibility = "compatibility",

View File

@@ -7,6 +7,7 @@ Please see LICENSE in the repository root for full details.
*/
import {
ConnectionState as LivekitConnectionState,
LocalAudioTrack,
type LocalTrackPublication,
LocalVideoTrack,
ParticipantEvent,
@@ -14,6 +15,7 @@ import {
Track,
} from "livekit-client";
import {
combineLatest,
map,
NEVER,
type Observable,
@@ -30,6 +32,19 @@ import {
trackProcessorSync,
} from "../../../livekit/TrackProcessorContext.tsx";
import { getUrlParams } from "../../../UrlParams.ts";
import {
vadEnabled,
vadPositiveThreshold,
vadMode,
vadAdvancedEnabled,
vadAdvancedOpenThreshold,
vadAdvancedCloseThreshold,
vadHoldTime,
} from "../../../settings/settings.ts";
import {
type NoiseGateParams,
NoiseGateTransformer,
} from "../../../livekit/NoiseGateTransformer.ts";
import { observeTrackReference$ } from "../../observeTrackReference";
import { type Connection } from "../remoteMembers/Connection.ts";
import { ObservableScope } from "../../ObservableScope.ts";
@@ -73,6 +88,8 @@ export class Publisher {
// Setup track processor syncing (blur)
this.observeTrackProcessors(this.scope, room, trackerProcessorState$);
// Setup noise gate on the local microphone track
this.applyNoiseGate(this.scope, room);
// Observe media device changes and update LiveKit active devices accordingly
this.observeMediaDevices(this.scope, devices, controlledAudioDevices);
@@ -400,6 +417,103 @@ export class Publisher {
});
}
private applyNoiseGate(scope: ObservableScope, room: LivekitRoom): void {
// Observe the local microphone track
const audioTrack$ = scope.behavior(
observeTrackReference$(
room.localParticipant,
Track.Source.Microphone,
).pipe(
map((ref) => {
const track = ref?.publication.track;
return track instanceof LocalAudioTrack ? track : null;
}),
),
null,
);
let transformer: NoiseGateTransformer | null = null;
let audioCtx: AudioContext | null = null;
const currentParams = (): NoiseGateParams => {
const isAdvanced = vadAdvancedEnabled.getValue();
if (isAdvanced) {
return {
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: vadAdvancedOpenThreshold.getValue(),
vadNegativeThreshold: vadAdvancedCloseThreshold.getValue(),
vadMode: vadMode.getValue(),
holdMs: vadHoldTime.getValue(),
};
}
const openT = vadPositiveThreshold.getValue();
return {
vadEnabled: vadEnabled.getValue(),
vadPositiveThreshold: openT,
vadNegativeThreshold: Math.max(0, openT - 0.1),
vadMode: "standard",
holdMs: 0,
};
};
// Attach / detach processor when VAD is toggled or the track changes.
combineLatest([audioTrack$, vadEnabled.value$])
.pipe(scope.bind())
.subscribe(([audioTrack, vadActive]) => {
if (!audioTrack) return;
const shouldAttach = vadActive;
if (shouldAttach && !audioTrack.getProcessor()) {
const params = currentParams();
this.logger.info("[NoiseGate] attaching processor, params:", params);
transformer = new NoiseGateTransformer(params);
audioCtx = new AudioContext();
this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(audioCtx);
audioCtx.resume().then(async () => {
this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return audioTrack.setProcessor(transformer as any);
}).then(() => {
this.logger.info("[NoiseGate] setProcessor resolved");
}).catch((e: unknown) => {
this.logger.error("[NoiseGate] setProcessor failed", e);
});
} else if (!shouldAttach && audioTrack.getProcessor()) {
this.logger.info("[NoiseGate] removing processor");
void audioTrack.stopProcessor();
void audioCtx?.close();
audioCtx = null;
transformer = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(undefined);
} else if (shouldAttach && audioTrack.getProcessor()) {
// Processor already attached — push updated params (e.g. noiseGateActive toggled)
transformer?.updateParams(currentParams());
} else {
this.logger.info(
"[NoiseGate] tick — vadActive:", vadActive,
"hasProcessor:", !!audioTrack.getProcessor(),
);
}
});
// Push VAD param changes to the live worklet.
combineLatest([
vadEnabled.value$,
vadPositiveThreshold.value$,
vadMode.value$,
vadAdvancedEnabled.value$,
vadAdvancedOpenThreshold.value$,
vadAdvancedCloseThreshold.value$,
vadHoldTime.value$,
])
.pipe(scope.bind())
.subscribe(() => {
transformer?.updateParams(currentParams());
});
}
private observeTrackProcessors(
scope: ObservableScope,
room: LivekitRoom,

View File

@@ -7,7 +7,6 @@ Please see LICENSE in the repository root for full details.
import {
loadEnv,
PluginOption,
searchForWorkspaceRoot,
type ConfigEnv,
type UserConfig,
@@ -34,7 +33,8 @@ export default ({
// In future we might be able to do what is needed via code splitting at
// build time.
process.env.VITE_PACKAGE = packageType ?? "full";
const plugins: PluginOption[] = [
const plugins = [
react(),
svgrPlugin({
svgrOptions: {