/* Copyright 2026 New Vector Ltd. SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial Please see LICENSE in the repository root for full details. */ import { type Track } from "livekit-client"; import { logger } from "matrix-js-sdk/lib/logger"; // ?worker&url tells Vite to compile the TypeScript worklet and return its URL. // Without this, Vite copies the .ts file verbatim and the browser rejects it. import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url"; const log = logger.getChild("[NoiseGateTransformer]"); export interface NoiseGateParams { noiseGateActive: boolean; threshold: number; // dBFS — gate opens above this, closes below it attackMs: number; holdMs: number; releaseMs: number; transientEnabled: boolean; transientThresholdDb: number; // dB above background RMS that triggers suppression transientReleaseMs: number; // ms for suppression to fade after transient ends // TEN-VAD params — processed entirely inside the AudioWorklet vadEnabled: boolean; vadPositiveThreshold: number; // open gate when isSpeech prob >= this (0–1) vadNegativeThreshold: number; // close gate when isSpeech prob < this (0–1) vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms } /** * Matches LiveKit's AudioProcessorOptions (experimental API, not publicly * exported, so we declare it locally based on the type definitions). */ interface AudioProcessorOptions { kind: Track.Kind.Audio; track: MediaStreamTrack; audioContext: AudioContext; element?: HTMLMediaElement; } /** * Matches LiveKit's TrackProcessor interface. */ export interface AudioTrackProcessor { name: string; processedTrack?: MediaStreamTrack; init(opts: AudioProcessorOptions): Promise; restart(opts: AudioProcessorOptions): Promise; destroy(): Promise; } // Cached compiled TEN-VAD module — compiled once, reused across processor restarts. let tenVadModulePromise: Promise | null = null; function getTenVADModule(): Promise { if (!tenVadModulePromise) { tenVadModulePromise = fetch("/vad/ten_vad.wasm") .then((r) => { if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`); return r.arrayBuffer(); }) .then((buf) => WebAssembly.compile(buf)) .catch((e) => { // Clear the cache so a retry is possible on next attach tenVadModulePromise = null; throw e; }); } return tenVadModulePromise; } /** * LiveKit audio track processor that applies a noise gate, optional transient * suppressor, and optional TEN-VAD gate via AudioWorklet. * * The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet * via processorOptions so it runs synchronously inside the audio thread — * no IPC round-trip, ~16 ms VAD latency. * * Audio graph: sourceNode → workletNode → destinationNode * processedTrack is destinationNode.stream.getAudioTracks()[0] */ export class NoiseGateTransformer implements AudioTrackProcessor { public readonly name = "noise-gate"; public processedTrack?: MediaStreamTrack; private workletNode?: AudioWorkletNode; private sourceNode?: MediaStreamAudioSourceNode; private destinationNode?: MediaStreamAudioDestinationNode; private params: NoiseGateParams; public constructor(params: NoiseGateParams) { this.params = { ...params }; } public async init(opts: AudioProcessorOptions): Promise { const { track, audioContext } = opts; log.info("init() called, audioContext state:", audioContext.state, "params:", this.params); // Fetch and compile the TEN-VAD WASM module (cached after first call) let tenVadModule: WebAssembly.Module | undefined; try { tenVadModule = await getTenVADModule(); log.info("TEN-VAD WASM module compiled"); } catch (e) { log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e); } log.info("loading worklet from:", compiledWorkletUrl); await audioContext.audioWorklet.addModule(compiledWorkletUrl); log.info("worklet module loaded"); this.workletNode = new AudioWorkletNode( audioContext, "noise-gate-processor", { processorOptions: { tenVadModule, }, }, ); this.workletNode.port.onmessage = ( e: MessageEvent<{ type: string; msg: string }>, ): void => { if (e.data?.type === "log") log.debug(e.data.msg); }; this.sendParams(); this.sourceNode = audioContext.createMediaStreamSource( new MediaStream([track]), ); this.destinationNode = audioContext.createMediaStreamDestination(); this.sourceNode.connect(this.workletNode); this.workletNode.connect(this.destinationNode); this.processedTrack = this.destinationNode.stream.getAudioTracks()[0]; log.info("graph wired, processedTrack:", this.processedTrack); } public async restart(opts: AudioProcessorOptions): Promise { await this.destroy(); await this.init(opts); } // eslint-disable-next-line @typescript-eslint/require-await public async destroy(): Promise { this.sourceNode?.disconnect(); this.workletNode?.disconnect(); this.destinationNode?.disconnect(); this.sourceNode = undefined; this.workletNode = undefined; this.destinationNode = undefined; this.processedTrack = undefined; } /** Push updated gate/VAD parameters to the running worklet. */ public updateParams(params: NoiseGateParams): void { this.params = { ...params }; this.sendParams(); } private sendParams(): void { if (!this.workletNode) return; log.debug("sendParams:", this.params); this.workletNode.port.postMessage(this.params); } }