Files
element-call-custom/src/livekit/NoiseGateTransformer.ts
mk ea44b04bb3 fix: use ?worker&url import so Vite compiles the worklet TypeScript
new URL('./file.ts', import.meta.url) copies the file verbatim — the
browser gets raw TypeScript and addModule() throws DOMException. Using
?worker&url tells Vite to bundle and compile the file, producing a .js
output that the browser can actually execute.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 22:03:25 -03:00

172 lines
5.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
Copyright 2026 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { type Track } from "livekit-client";
import { logger } from "matrix-js-sdk/lib/logger";
// ?worker&url tells Vite to compile the TypeScript worklet and return its URL.
// Without this, Vite copies the .ts file verbatim and the browser rejects it.
import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url";
const log = logger.getChild("[NoiseGateTransformer]");
export interface NoiseGateParams {
noiseGateActive: boolean;
threshold: number; // dBFS — gate opens above this, closes below it
attackMs: number;
holdMs: number;
releaseMs: number;
transientEnabled: boolean;
transientThresholdDb: number; // dB above background RMS that triggers suppression
transientReleaseMs: number; // ms for suppression to fade after transient ends
// TEN-VAD params — processed entirely inside the AudioWorklet
vadEnabled: boolean;
vadPositiveThreshold: number; // open gate when isSpeech prob >= this (01)
vadNegativeThreshold: number; // close gate when isSpeech prob < this (01)
vadAggressive: boolean; // true: 1 ms open / 5 ms close ramp; false: 5 ms / 20 ms
}
/**
* Matches LiveKit's AudioProcessorOptions (experimental API, not publicly
* exported, so we declare it locally based on the type definitions).
*/
interface AudioProcessorOptions {
kind: Track.Kind.Audio;
track: MediaStreamTrack;
audioContext: AudioContext;
element?: HTMLMediaElement;
}
/**
* Matches LiveKit's TrackProcessor<Track.Kind.Audio> interface.
*/
export interface AudioTrackProcessor {
name: string;
processedTrack?: MediaStreamTrack;
init(opts: AudioProcessorOptions): Promise<void>;
restart(opts: AudioProcessorOptions): Promise<void>;
destroy(): Promise<void>;
}
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
function getTenVADModule(): Promise<WebAssembly.Module> {
if (!tenVadModulePromise) {
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
.then((r) => {
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
return r.arrayBuffer();
})
.then((buf) => WebAssembly.compile(buf))
.catch((e) => {
// Clear the cache so a retry is possible on next attach
tenVadModulePromise = null;
throw e;
});
}
return tenVadModulePromise;
}
/**
* LiveKit audio track processor that applies a noise gate, optional transient
* suppressor, and optional TEN-VAD gate via AudioWorklet.
*
* The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
* via processorOptions so it runs synchronously inside the audio thread —
* no IPC round-trip, ~16 ms VAD latency.
*
* Audio graph: sourceNode → workletNode → destinationNode
* processedTrack is destinationNode.stream.getAudioTracks()[0]
*/
export class NoiseGateTransformer implements AudioTrackProcessor {
public readonly name = "noise-gate";
public processedTrack?: MediaStreamTrack;
private workletNode?: AudioWorkletNode;
private sourceNode?: MediaStreamAudioSourceNode;
private destinationNode?: MediaStreamAudioDestinationNode;
private params: NoiseGateParams;
public constructor(params: NoiseGateParams) {
this.params = { ...params };
}
public async init(opts: AudioProcessorOptions): Promise<void> {
const { track, audioContext } = opts;
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
// Fetch and compile the TEN-VAD WASM module (cached after first call)
let tenVadModule: WebAssembly.Module | undefined;
try {
tenVadModule = await getTenVADModule();
log.info("TEN-VAD WASM module compiled");
} catch (e) {
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
}
log.info("loading worklet from:", compiledWorkletUrl);
await audioContext.audioWorklet.addModule(compiledWorkletUrl);
log.info("worklet module loaded");
this.workletNode = new AudioWorkletNode(
audioContext,
"noise-gate-processor",
{
processorOptions: {
tenVadModule,
},
},
);
this.workletNode.port.onmessage = (
e: MessageEvent<{ type: string; msg: string }>,
): void => {
if (e.data?.type === "log") log.debug(e.data.msg);
};
this.sendParams();
this.sourceNode = audioContext.createMediaStreamSource(
new MediaStream([track]),
);
this.destinationNode = audioContext.createMediaStreamDestination();
this.sourceNode.connect(this.workletNode);
this.workletNode.connect(this.destinationNode);
this.processedTrack = this.destinationNode.stream.getAudioTracks()[0];
log.info("graph wired, processedTrack:", this.processedTrack);
}
public async restart(opts: AudioProcessorOptions): Promise<void> {
await this.destroy();
await this.init(opts);
}
// eslint-disable-next-line @typescript-eslint/require-await
public async destroy(): Promise<void> {
this.sourceNode?.disconnect();
this.workletNode?.disconnect();
this.destinationNode?.disconnect();
this.sourceNode = undefined;
this.workletNode = undefined;
this.destinationNode = undefined;
this.processedTrack = undefined;
}
/** Push updated gate/VAD parameters to the running worklet. */
public updateParams(params: NoiseGateParams): void {
this.params = { ...params };
this.sendParams();
}
private sendParams(): void {
if (!this.workletNode) return;
log.debug("sendParams:", this.params);
this.workletNode.port.postMessage(this.params);
}
}