diff --git a/public/vad/ten_vad.js b/public/vad/ten_vad.js new file mode 100644 index 00000000..c5615acd --- /dev/null +++ b/public/vad/ten_vad.js @@ -0,0 +1,30 @@ + +var createVADModule = (() => { + var _scriptDir = import.meta.url; + + return ( +function(createVADModule) { + createVADModule = createVADModule || {}; + + +var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w; +if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary); +var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[]; +function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href; +function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0>>=0;if(2147483648=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e>2], +X=F[c+4>>2];c+=8;for(var G=0;G=q);)++t;if(16g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1=== +b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}}; +(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h, +b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})(); +a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)}; +a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)}; +function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0 = ({ max, step, disabled, + tooltip = true, }) => { const onValueChange = useCallback( ([v]: number[]) => onValueChangeProp(v), @@ -71,9 +73,13 @@ export const Slider: FC = ({ {/* Note: This is expected not to be visible on mobile.*/} - + {tooltip ? ( + + + + ) : ( - + )} ); }; diff --git a/src/livekit/NoiseGateProcessor.worklet.ts b/src/livekit/NoiseGateProcessor.worklet.ts new file mode 100644 index 00000000..70d2d0db --- /dev/null +++ b/src/livekit/NoiseGateProcessor.worklet.ts @@ -0,0 +1,391 @@ +/* +Copyright 2026 New Vector Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +declare const sampleRate: number; +declare class AudioWorkletProcessor { + public readonly port: MessagePort; + public constructor(options?: { + processorOptions?: Record; + }); + public process( + inputs: Float32Array[][], + outputs: Float32Array[][], + parameters: Record, + ): boolean; +} +declare function registerProcessor( + name: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + processorCtor: new (...args: any[]) => AudioWorkletProcessor, +): void; + +interface NoiseGateParams { + // TEN-VAD params + vadEnabled: boolean; + vadPositiveThreshold: number; // open gate when prob >= this (0–1) + vadNegativeThreshold: number; // close gate when prob < this (0–1) + vadMode: "standard" | "aggressive" | "loose"; + holdMs: number; // hold time before closing gate (ms); 0 = no hold +} + +interface VADGateMessage { + type: "vad-gate"; + open: boolean; +} + +/** + * Thin synchronous wrapper around the TEN-VAD Emscripten WASM module. + * Instantiated synchronously in the AudioWorklet constructor from a + * pre-compiled WebAssembly.Module passed via processorOptions. + */ +class TenVADRuntime { + private readonly mem: WebAssembly.Memory; + private readonly freeFn: (ptr: number) => void; + private readonly processFn: ( + handle: number, + audioPtr: number, + hopSize: number, + probPtr: number, + flagPtr: number, + ) => number; + private readonly destroyFn: (handle: number) => number; + private readonly handle: number; + private readonly audioBufPtr: number; + private readonly probPtr: number; + private readonly flagPtr: number; + public readonly hopSize: number; + + public constructor( + module: WebAssembly.Module, + hopSize: number, + threshold: number, + ) { + this.hopSize = hopSize; + + // Late-bound memory reference — emscripten_resize_heap and memmove + // are only called after instantiation, so closing over this is safe. + const state = { mem: null as WebAssembly.Memory | null }; + + const imports = { + a: { + // abort + a: (): never => { + throw new Error("ten_vad abort"); + }, + // fd_write / proc_exit stub + b: (): number => 0, + // emscripten_resize_heap + c: (reqBytes: number): number => { + if (!state.mem) return 0; + try { + const cur = state.mem.buffer.byteLength; + if (cur >= reqBytes) return 1; + state.mem.grow(Math.ceil((reqBytes - cur) / 65536)); + return 1; + } catch { + return 0; + } + }, + // fd_write stub + d: (): number => 0, + // environ stub + e: (): number => 0, + // memmove + f: (dest: number, src: number, len: number): void => { + if (state.mem) { + new Uint8Array(state.mem.buffer).copyWithin(dest, src, src + len); + } + }, + }, + }; + + // Synchronous instantiation — valid in Worker/AudioWorklet global scope + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const instance = new WebAssembly.Instance(module, imports as any); + const asm = instance.exports as { + g: WebAssembly.Memory; // exported memory + h: () => void; // __wasm_call_ctors + i: (n: number) => number; // malloc + j: (p: number) => void; // free + k: (handlePtr: number, hopSize: number, threshold: number) => number; // ten_vad_create + l: (handle: number, audioPtr: number, hopSize: number, probPtr: number, flagPtr: number) => number; // ten_vad_process + m: (handle: number) => number; // ten_vad_destroy + }; + + state.mem = asm.g; + this.mem = asm.g; + this.freeFn = asm.j; + this.processFn = asm.l; + this.destroyFn = asm.m; + + // Run Emscripten static constructors + asm.h(); + + // Allocate persistent buffers (malloc is 8-byte aligned, so alignment is fine) + this.audioBufPtr = asm.i(hopSize * 2); // Int16Array + this.probPtr = asm.i(4); // float + this.flagPtr = asm.i(4); // int + + // Create VAD handle — ten_vad_create(void** handle, int hopSize, float threshold) + const handlePtrPtr = asm.i(4); + const ret = asm.k(handlePtrPtr, hopSize, threshold); + if (ret !== 0) throw new Error(`ten_vad_create failed: ${ret}`); + this.handle = new Int32Array(this.mem.buffer)[handlePtrPtr >> 2]; + asm.j(handlePtrPtr); + } + + /** Process one hop of Int16 audio. Returns speech probability [0–1]. */ + public process(samples: Int16Array): number { + new Int16Array(this.mem.buffer).set(samples, this.audioBufPtr >> 1); + this.processFn( + this.handle, + this.audioBufPtr, + this.hopSize, + this.probPtr, + this.flagPtr, + ); + return new Float32Array(this.mem.buffer)[this.probPtr >> 2]; + } + + public destroy(): void { + this.destroyFn(this.handle); + this.freeFn(this.audioBufPtr); + this.freeFn(this.probPtr); + this.freeFn(this.flagPtr); + } +} + +/** + * AudioWorkletProcessor implementing an in-worklet TEN-VAD gate running + * per-sample. + * + * TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz), + * runs the TEN-VAD model synchronously every 256 samples (16 ms), and + * controls vadGateOpen with hysteresis. No IPC round-trip required. + * Asymmetric ramp: 5 ms open (minimise speech onset masking), 20 ms close + * (de-click on silence). + */ +class NoiseGateProcessor extends AudioWorkletProcessor { + // VAD gate state + private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame + private vadAttenuation = 1.0; + // Asymmetric ramp rates — recomputed in updateParams based on vadAggressive + private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms + private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms + + // TEN-VAD state + private vadEnabled = false; + private vadPositiveThreshold = 0.5; + private vadNegativeThreshold = 0.3; + private holdMs = 0; + private vadHoldHops = 0; // hold expressed in VAD hops + private vadHoldCounter = 0; // hops of continuous sub-threshold signal while gate is open + private tenVadRuntime: TenVADRuntime | null = null; + private tenVadModule: WebAssembly.Module | undefined = undefined; + // 3:1 decimation from AudioContext sample rate to 16 kHz + private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000)); + private decPhase = 0; + private decAcc = 0; + // Buffer sized for max hop (256); vadHopSize tracks how many samples to collect + private readonly vadHopBuf = new Int16Array(256); + private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms) + private vadHopCount = 0; + + private logCounter = 0; + + public constructor(options?: { + processorOptions?: Record; + }) { + super(options); + + // Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread + this.tenVadModule = options?.processorOptions?.tenVadModule as + | WebAssembly.Module + | undefined; + if (this.tenVadModule) { + try { + // Default: standard mode — 256 samples @ 16 kHz = 16 ms + this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5); + this.port.postMessage({ + type: "log", + msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio, + }); + } catch (e) { + this.port.postMessage({ + type: "log", + msg: "[NoiseGate worklet] TEN-VAD init failed: " + String(e), + }); + } + } + + this.port.onmessage = ( + e: MessageEvent, + ): void => { + if ((e.data as VADGateMessage).type === "vad-gate") { + this.vadGateOpen = (e.data as VADGateMessage).open; + } else { + this.updateParams(e.data as NoiseGateParams); + } + }; + + this.updateParams({ + vadEnabled: false, + vadPositiveThreshold: 0.5, + vadNegativeThreshold: 0.3, + vadMode: "standard", + holdMs: 0, + }); + + this.port.postMessage({ + type: "log", + msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate, + }); + } + + private updateParams(p: NoiseGateParams): void { + this.vadEnabled = p.vadEnabled ?? false; + this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5; + this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3; + this.holdMs = p.holdMs ?? 0; + + const newMode = p.vadMode ?? "standard"; + if (newMode === "aggressive") { + this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms + this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms + } else if (newMode === "loose") { + this.vadOpenRampRate = 1.0 / (0.012 * sampleRate); // 12 ms + this.vadCloseRampRate = 1.0 / (0.032 * sampleRate); // 32 ms + } else { + this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms + this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms + } + + // Hop size: aggressive=160 (10 ms @ 16 kHz), others=256 (16 ms) + const newHopSize = newMode === "aggressive" ? 160 : 256; + if (newHopSize !== this.vadHopSize && this.tenVadModule) { + this.tenVadRuntime?.destroy(); + this.tenVadRuntime = null; + this.vadHopCount = 0; + try { + this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5); + } catch (e) { + this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) }); + } + } + this.vadHopSize = newHopSize; + + // Recompute hold in hops: ceil((holdMs / 1000) * 16000 / vadHopSize) + this.vadHoldHops = this.holdMs > 0 + ? Math.ceil((this.holdMs / 1000) * 16000 / this.vadHopSize) + : 0; + this.vadHoldCounter = 0; + + if (!this.vadEnabled) this.vadGateOpen = true; + this.port.postMessage({ + type: "log", + msg: "[NoiseGate worklet] params updated: vadEnabled=" + p.vadEnabled + + " vadPos=" + p.vadPositiveThreshold + + " vadNeg=" + p.vadNegativeThreshold + + " vadMode=" + newMode + + " holdMs=" + this.holdMs, + }); + } + + public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean { + const input = inputs[0]; + const output = outputs[0]; + if (!input || input.length === 0) return true; + + const blockSize = input[0]?.length ?? 128; + + for (let i = 0; i < blockSize; i++) { + // --- TEN-VAD in-worklet processing --- + // Accumulate raw mono samples with decRatio:1 decimation (48 kHz → 16 kHz). + // Every 256 output samples (16 ms) run the WASM VAD and update vadGateOpen. + if (this.vadEnabled && this.tenVadRuntime !== null) { + this.decAcc += input[0]?.[i] ?? 0; + this.decPhase++; + if (this.decPhase >= this.decRatio) { + this.decPhase = 0; + const avg = this.decAcc / this.decRatio; + this.decAcc = 0; + // Float32 [-1,1] → Int16 with clamping + const s16 = + avg >= 1.0 + ? 32767 + : avg <= -1.0 + ? -32768 + : (avg * 32767 + 0.5) | 0; + this.vadHopBuf[this.vadHopCount++] = s16; + + if (this.vadHopCount >= this.vadHopSize) { + this.vadHopCount = 0; + const prob = this.tenVadRuntime.process(this.vadHopBuf); + if (prob >= this.vadPositiveThreshold) { + // Speech detected — open gate, reset hold counter + this.vadGateOpen = true; + this.vadHoldCounter = 0; + } else if (prob < this.vadNegativeThreshold) { + if (this.vadGateOpen) { + if (this.vadHoldHops === 0) { + this.vadGateOpen = false; + } else { + this.vadHoldCounter++; + if (this.vadHoldCounter >= this.vadHoldHops) { + this.vadGateOpen = false; + this.vadHoldCounter = 0; + } + } + } + } else { + // Ambiguous zone — reset hold counter so hold only fires on sustained silence + this.vadHoldCounter = 0; + } + } + } + } + + // Asymmetric ramp: fast open (5 ms) to minimise speech onset masking, + // slow close (20 ms) to de-click on silence transitions. + const vadTarget = this.vadGateOpen ? 1.0 : 0.0; + if (this.vadAttenuation < vadTarget) { + this.vadAttenuation = Math.min( + vadTarget, + this.vadAttenuation + this.vadOpenRampRate, + ); + } else if (this.vadAttenuation > vadTarget) { + this.vadAttenuation = Math.max( + vadTarget, + this.vadAttenuation - this.vadCloseRampRate, + ); + } + + const gain = this.vadAttenuation; + + for (let c = 0; c < output.length; c++) { + const inCh = input[c] ?? input[0]; + const outCh = output[c]; + if (inCh && outCh) { + outCh[i] = (inCh[i] ?? 0) * gain; + } + } + } + + this.logCounter++; + if (this.logCounter % 375 === 0) { + this.port.postMessage({ + type: "log", + msg: "[NoiseGate worklet] vadOpen=" + this.vadGateOpen + + " vadAtten=" + this.vadAttenuation.toFixed(3), + }); + } + + return true; + } +} + +registerProcessor("noise-gate-processor", NoiseGateProcessor); diff --git a/src/livekit/NoiseGateTransformer.ts b/src/livekit/NoiseGateTransformer.ts new file mode 100644 index 00000000..52403af6 --- /dev/null +++ b/src/livekit/NoiseGateTransformer.ts @@ -0,0 +1,163 @@ +/* +Copyright 2026 New Vector Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { type Track } from "livekit-client"; +import { logger } from "matrix-js-sdk/lib/logger"; +// ?worker&url tells Vite to compile the TypeScript worklet and return its URL. +// Without this, Vite copies the .ts file verbatim and the browser rejects it. +import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url"; + +const log = logger.getChild("[NoiseGateTransformer]"); + +export interface NoiseGateParams { + // TEN-VAD params — processed entirely inside the AudioWorklet + vadEnabled: boolean; + vadPositiveThreshold: number; // open gate when prob >= this (0–1) + vadNegativeThreshold: number; // close gate when prob < this (0–1); computed by Publisher + vadMode: "standard" | "aggressive" | "loose"; + holdMs: number; // hold time before closing gate (ms); 0 = no hold +} + +/** + * Matches LiveKit's AudioProcessorOptions (experimental API, not publicly + * exported, so we declare it locally based on the type definitions). + */ +interface AudioProcessorOptions { + kind: Track.Kind.Audio; + track: MediaStreamTrack; + audioContext: AudioContext; + element?: HTMLMediaElement; +} + +/** + * Matches LiveKit's TrackProcessor interface. + */ +export interface AudioTrackProcessor { + name: string; + processedTrack?: MediaStreamTrack; + init(opts: AudioProcessorOptions): Promise; + restart(opts: AudioProcessorOptions): Promise; + destroy(): Promise; +} + +// Cached compiled TEN-VAD module — compiled once, reused across processor restarts. +let tenVadModulePromise: Promise | null = null; + +function getTenVADModule(): Promise { + if (!tenVadModulePromise) { + tenVadModulePromise = fetch("/vad/ten_vad.wasm") + .then((r) => { + if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`); + return r.arrayBuffer(); + }) + .then((buf) => WebAssembly.compile(buf)) + .catch((e) => { + // Clear the cache so a retry is possible on next attach + tenVadModulePromise = null; + throw e; + }); + } + return tenVadModulePromise; +} + +/** + * LiveKit audio track processor that applies TEN-VAD via AudioWorklet. + * + * The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet + * via processorOptions so it runs synchronously inside the audio thread — + * no IPC round-trip, ~16 ms VAD latency. + * + * Audio graph: sourceNode → workletNode → destinationNode + * processedTrack is destinationNode.stream.getAudioTracks()[0] + */ +export class NoiseGateTransformer implements AudioTrackProcessor { + public readonly name = "noise-gate"; + public processedTrack?: MediaStreamTrack; + + private workletNode?: AudioWorkletNode; + private sourceNode?: MediaStreamAudioSourceNode; + private destinationNode?: MediaStreamAudioDestinationNode; + private params: NoiseGateParams; + + public constructor(params: NoiseGateParams) { + this.params = { ...params }; + } + + public async init(opts: AudioProcessorOptions): Promise { + const { track, audioContext } = opts; + + log.info("init() called, audioContext state:", audioContext.state, "params:", this.params); + + // Fetch and compile the TEN-VAD WASM module (cached after first call) + let tenVadModule: WebAssembly.Module | undefined; + try { + tenVadModule = await getTenVADModule(); + log.info("TEN-VAD WASM module compiled"); + } catch (e) { + log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e); + } + + log.info("loading worklet from:", compiledWorkletUrl); + await audioContext.audioWorklet.addModule(compiledWorkletUrl); + log.info("worklet module loaded"); + + this.workletNode = new AudioWorkletNode( + audioContext, + "noise-gate-processor", + { + processorOptions: { + tenVadModule, + }, + }, + ); + this.workletNode.port.onmessage = ( + e: MessageEvent<{ type: string; msg: string }>, + ): void => { + if (e.data?.type === "log") log.debug(e.data.msg); + }; + this.sendParams(); + + this.sourceNode = audioContext.createMediaStreamSource( + new MediaStream([track]), + ); + this.destinationNode = audioContext.createMediaStreamDestination(); + + this.sourceNode.connect(this.workletNode); + this.workletNode.connect(this.destinationNode); + + this.processedTrack = this.destinationNode.stream.getAudioTracks()[0]; + log.info("graph wired, processedTrack:", this.processedTrack); + } + + public async restart(opts: AudioProcessorOptions): Promise { + await this.destroy(); + await this.init(opts); + } + + // eslint-disable-next-line @typescript-eslint/require-await + public async destroy(): Promise { + this.sourceNode?.disconnect(); + this.workletNode?.disconnect(); + this.destinationNode?.disconnect(); + this.sourceNode = undefined; + this.workletNode = undefined; + this.destinationNode = undefined; + this.processedTrack = undefined; + } + + /** Push updated gate/VAD parameters to the running worklet. */ + public updateParams(params: NoiseGateParams): void { + this.params = { ...params }; + this.sendParams(); + } + + private sendParams(): void { + if (!this.workletNode) return; + log.debug("sendParams:", this.params); + this.workletNode.port.postMessage(this.params); + } +} diff --git a/src/settings/SettingsModal.module.css b/src/settings/SettingsModal.module.css index b07cb4c8..f7efedca 100644 --- a/src/settings/SettingsModal.module.css +++ b/src/settings/SettingsModal.module.css @@ -21,7 +21,8 @@ Please see LICENSE in the repository root for full details. margin-top: var(--cpd-space-2x); } -.volumeSlider > label { +.volumeSlider > label, +.sliderLabel { margin-bottom: var(--cpd-space-1x); display: block; } @@ -33,3 +34,40 @@ Please see LICENSE in the repository root for full details. .volumeSlider > p { color: var(--cpd-color-text-secondary); } + +.noiseGateSection { + margin-block-start: var(--cpd-space-6x); +} + +.noiseGateHeading { + color: var(--cpd-color-text-secondary); + margin-block: var(--cpd-space-3x) 0; +} + +.thresholdSlider { + margin-block-start: calc(-32px + var(--cpd-space-2x)); +} + +.noiseGateSeparator { + margin-block: 6px var(--cpd-space-4x); +} + +.advancedGate { + margin-top: var(--cpd-space-3x); +} + +.advancedGateToggle { + all: unset; + cursor: pointer; + font: var(--cpd-font-body-sm-semibold); + color: var(--cpd-color-text-secondary); + user-select: none; +} + +.advancedGateToggle:hover { + color: var(--cpd-color-text-primary); +} + +.restoreDefaults { + margin-top: var(--cpd-space-6x); +} diff --git a/src/settings/SettingsModal.tsx b/src/settings/SettingsModal.tsx index 30ac3618..8a77cee7 100644 --- a/src/settings/SettingsModal.tsx +++ b/src/settings/SettingsModal.tsx @@ -5,10 +5,19 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial Please see LICENSE in the repository root for full details. */ -import { type FC, type ReactNode, useEffect, useState } from "react"; +import { type FC, type ReactNode, useEffect, useId, useState } from "react"; import { useTranslation } from "react-i18next"; import { type MatrixClient } from "matrix-js-sdk"; -import { Button, Root as Form, Separator } from "@vector-im/compound-web"; +import { + Button, + Heading, + HelpMessage, + InlineField, + Label, + RadioControl, + Root as Form, + Separator, +} from "@vector-im/compound-web"; import { type Room as LivekitRoom } from "livekit-client"; import { Modal } from "../Modal"; @@ -24,6 +33,13 @@ import { soundEffectVolume as soundEffectVolumeSetting, backgroundBlur as backgroundBlurSetting, developerMode, + vadEnabled as vadEnabledSetting, + vadPositiveThreshold as vadPositiveThresholdSetting, + vadMode as vadModeSetting, + vadAdvancedEnabled as vadAdvancedEnabledSetting, + vadAdvancedOpenThreshold as vadAdvancedOpenThresholdSetting, + vadAdvancedCloseThreshold as vadAdvancedCloseThresholdSetting, + vadHoldTime as vadHoldTimeSetting, } from "./settings"; import { PreferencesSettingsTab } from "./PreferencesSettingsTab"; import { Slider } from "../Slider"; @@ -107,6 +123,26 @@ export const SettingsModal: FC = ({ const [soundVolumeRaw, setSoundVolumeRaw] = useState(soundVolume); const [showDeveloperSettingsTab] = useSetting(developerMode); + // Voice activity detection + const vadStateGroup = useId(); + const vadModeRadioGroup = useId(); + const [vadActive, setVadActive] = useSetting(vadEnabledSetting); + const [vadSensitivity, setVadSensitivity] = useSetting(vadPositiveThresholdSetting); + const [vadSensitivityRaw, setVadSensitivityRaw] = useState(vadSensitivity); + const [vadAdvanced, setVadAdvanced] = useSetting(vadAdvancedEnabledSetting); + const vadState = !vadActive ? "disabled" : vadAdvanced ? "advanced" : "simple"; + const setVadState = (s: "disabled" | "simple" | "advanced"): void => { + setVadActive(s !== "disabled"); + setVadAdvanced(s === "advanced"); + }; + const [vadModeValue, setVadModeValue] = useSetting(vadModeSetting); + const [vadAdvOpen, setVadAdvOpen] = useSetting(vadAdvancedOpenThresholdSetting); + const [vadAdvOpenRaw, setVadAdvOpenRaw] = useState(vadAdvOpen); + const [vadAdvClose, setVadAdvClose] = useSetting(vadAdvancedCloseThresholdSetting); + const [vadAdvCloseRaw, setVadAdvCloseRaw] = useState(vadAdvClose); + const [vadHold, setVadHold] = useSetting(vadHoldTimeSetting); + const [vadHoldRaw, setVadHoldRaw] = useState(vadHold); + const { available: isRageshakeAvailable } = useSubmitRageshake(); // For controlled devices, we will not show the input section: @@ -165,6 +201,189 @@ export const SettingsModal: FC = ({ /> +
+ + Voice Activity Detection + + +
+ setVadState("disabled")} + /> + } + > + + + setVadState("simple")} + /> + } + > + + + setVadState("advanced")} + /> + } + > + + +
+ {vadState !== "disabled" && ( + <> + {vadState === "simple" && ( +
+ + Sensitivity: {Math.round(vadSensitivityRaw * 100)}% + +

Higher values require more confident speech detection before opening.

+ +
+ )} + {vadState === "advanced" && ( + <> + Ramp profiles +
+ setVadModeValue("loose")} + /> + } + > + + 256 samples / 16 ms — 12 ms open / 32 ms close ramp. + + setVadModeValue("standard")} + /> + } + > + + 256 samples / 16 ms — 5 ms open / 20 ms close ramp. + + setVadModeValue("aggressive")} + /> + } + > + + 160 samples / 10 ms — 1 ms open / 5 ms close ramp. + +
+
+ + Open threshold: {Math.round(vadAdvOpenRaw * 100)}% + +

Minimum confidence required to open the gate.

+ +
+
+ + Close threshold: {Math.round(vadAdvCloseRaw * 100)}% + +

Probability must drop below this to start the hold/close sequence.

+ +
+
+ + Hold time: {vadHoldRaw} ms + +

How long to keep the gate open after speech drops below the close threshold.

+ +
+
+ +
+ + )} + + )} +
), }; diff --git a/src/settings/settings.ts b/src/settings/settings.ts index 917c79f1..1cf3a565 100644 --- a/src/settings/settings.ts +++ b/src/settings/settings.ts @@ -129,6 +129,17 @@ export const alwaysShowIphoneEarpiece = new Setting( false, ); +export const vadEnabled = new Setting("vad-enabled", false); +// Simple mode: single sensitivity slider (open threshold); close = open - 0.1 +export const vadPositiveThreshold = new Setting("vad-positive-threshold", 0.7); +// standard: 5ms/20ms aggressive: 1ms/5ms loose: 12ms/32ms +export const vadMode = new Setting<"standard" | "aggressive" | "loose">("vad-mode", "standard"); +// Advanced settings (override simple mode when enabled) +export const vadAdvancedEnabled = new Setting("vad-advanced-enabled", false); +export const vadAdvancedOpenThreshold = new Setting("vad-advanced-open-threshold", 0.7); +export const vadAdvancedCloseThreshold = new Setting("vad-advanced-close-threshold", 0.6); +export const vadHoldTime = new Setting("vad-hold-time", 300); + export enum MatrixRTCMode { Legacy = "legacy", Compatibility = "compatibility", diff --git a/src/state/CallViewModel/localMember/Publisher.ts b/src/state/CallViewModel/localMember/Publisher.ts index b7841c49..00b85039 100644 --- a/src/state/CallViewModel/localMember/Publisher.ts +++ b/src/state/CallViewModel/localMember/Publisher.ts @@ -7,6 +7,7 @@ Please see LICENSE in the repository root for full details. */ import { ConnectionState as LivekitConnectionState, + LocalAudioTrack, type LocalTrackPublication, LocalVideoTrack, ParticipantEvent, @@ -14,6 +15,7 @@ import { Track, } from "livekit-client"; import { + combineLatest, map, NEVER, type Observable, @@ -30,6 +32,19 @@ import { trackProcessorSync, } from "../../../livekit/TrackProcessorContext.tsx"; import { getUrlParams } from "../../../UrlParams.ts"; +import { + vadEnabled, + vadPositiveThreshold, + vadMode, + vadAdvancedEnabled, + vadAdvancedOpenThreshold, + vadAdvancedCloseThreshold, + vadHoldTime, +} from "../../../settings/settings.ts"; +import { + type NoiseGateParams, + NoiseGateTransformer, +} from "../../../livekit/NoiseGateTransformer.ts"; import { observeTrackReference$ } from "../../observeTrackReference"; import { type Connection } from "../remoteMembers/Connection.ts"; import { ObservableScope } from "../../ObservableScope.ts"; @@ -73,6 +88,8 @@ export class Publisher { // Setup track processor syncing (blur) this.observeTrackProcessors(this.scope, room, trackerProcessorState$); + // Setup noise gate on the local microphone track + this.applyNoiseGate(this.scope, room); // Observe media device changes and update LiveKit active devices accordingly this.observeMediaDevices(this.scope, devices, controlledAudioDevices); @@ -400,6 +417,103 @@ export class Publisher { }); } + private applyNoiseGate(scope: ObservableScope, room: LivekitRoom): void { + // Observe the local microphone track + const audioTrack$ = scope.behavior( + observeTrackReference$( + room.localParticipant, + Track.Source.Microphone, + ).pipe( + map((ref) => { + const track = ref?.publication.track; + return track instanceof LocalAudioTrack ? track : null; + }), + ), + null, + ); + + let transformer: NoiseGateTransformer | null = null; + let audioCtx: AudioContext | null = null; + + const currentParams = (): NoiseGateParams => { + const isAdvanced = vadAdvancedEnabled.getValue(); + if (isAdvanced) { + return { + vadEnabled: vadEnabled.getValue(), + vadPositiveThreshold: vadAdvancedOpenThreshold.getValue(), + vadNegativeThreshold: vadAdvancedCloseThreshold.getValue(), + vadMode: vadMode.getValue(), + holdMs: vadHoldTime.getValue(), + }; + } + const openT = vadPositiveThreshold.getValue(); + return { + vadEnabled: vadEnabled.getValue(), + vadPositiveThreshold: openT, + vadNegativeThreshold: Math.max(0, openT - 0.1), + vadMode: "standard", + holdMs: 0, + }; + }; + + // Attach / detach processor when VAD is toggled or the track changes. + combineLatest([audioTrack$, vadEnabled.value$]) + .pipe(scope.bind()) + .subscribe(([audioTrack, vadActive]) => { + if (!audioTrack) return; + const shouldAttach = vadActive; + if (shouldAttach && !audioTrack.getProcessor()) { + const params = currentParams(); + this.logger.info("[NoiseGate] attaching processor, params:", params); + transformer = new NoiseGateTransformer(params); + audioCtx = new AudioContext(); + this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (audioTrack as any).setAudioContext(audioCtx); + audioCtx.resume().then(async () => { + this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return audioTrack.setProcessor(transformer as any); + }).then(() => { + this.logger.info("[NoiseGate] setProcessor resolved"); + }).catch((e: unknown) => { + this.logger.error("[NoiseGate] setProcessor failed", e); + }); + } else if (!shouldAttach && audioTrack.getProcessor()) { + this.logger.info("[NoiseGate] removing processor"); + void audioTrack.stopProcessor(); + void audioCtx?.close(); + audioCtx = null; + transformer = null; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (audioTrack as any).setAudioContext(undefined); + } else if (shouldAttach && audioTrack.getProcessor()) { + // Processor already attached — push updated params (e.g. noiseGateActive toggled) + transformer?.updateParams(currentParams()); + } else { + this.logger.info( + "[NoiseGate] tick — vadActive:", vadActive, + "hasProcessor:", !!audioTrack.getProcessor(), + ); + } + }); + + // Push VAD param changes to the live worklet. + combineLatest([ + vadEnabled.value$, + vadPositiveThreshold.value$, + vadMode.value$, + vadAdvancedEnabled.value$, + vadAdvancedOpenThreshold.value$, + vadAdvancedCloseThreshold.value$, + vadHoldTime.value$, + ]) + .pipe(scope.bind()) + .subscribe(() => { + transformer?.updateParams(currentParams()); + }); + } + private observeTrackProcessors( scope: ObservableScope, room: LivekitRoom, diff --git a/vite.config.ts b/vite.config.ts index 97d643ec..0c0d10b5 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -7,7 +7,6 @@ Please see LICENSE in the repository root for full details. import { loadEnv, - PluginOption, searchForWorkspaceRoot, type ConfigEnv, type UserConfig, @@ -34,7 +33,8 @@ export default ({ // In future we might be able to do what is needed via code splitting at // build time. process.env.VITE_PACKAGE = packageType ?? "full"; - const plugins: PluginOption[] = [ + + const plugins = [ react(), svgrPlugin({ svgrOptions: {