feat: adding TEN-VAD wasm to project and configuration options on audio
Some checks failed
Build / build_full_element_call (pull_request) Has been cancelled
Build / build_embedded_element_call (pull_request) Has been cancelled
Build / build_sdk_element_call (pull_request) Has been cancelled
Lint, format & type check / Lint, format & type check (pull_request) Has been cancelled
Build & publish embedded packages for releases / Versioning (pull_request) Has been cancelled
Test / Run unit tests (pull_request) Has been cancelled
Test / Run end-to-end tests (pull_request) Has been cancelled
GitHub Actions Security Analysis with zizmor 🌈 / Run zizmor 🌈 (pull_request) Has been cancelled
Prevent blocked / Prevent blocked (pull_request_target) Has been cancelled
PR changelog label / pr-changelog-label (pull_request_target) Has been cancelled
Build / deploy_develop (pull_request) Has been cancelled
Build / docker_for_develop (pull_request) Has been cancelled
Build & publish embedded packages for releases / build_element_call (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish tarball (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish NPM (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish Android AAR (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish SwiftPM Library (pull_request) Has been cancelled
Build & publish embedded packages for releases / Update release notes (pull_request) Has been cancelled
Some checks failed
Build / build_full_element_call (pull_request) Has been cancelled
Build / build_embedded_element_call (pull_request) Has been cancelled
Build / build_sdk_element_call (pull_request) Has been cancelled
Lint, format & type check / Lint, format & type check (pull_request) Has been cancelled
Build & publish embedded packages for releases / Versioning (pull_request) Has been cancelled
Test / Run unit tests (pull_request) Has been cancelled
Test / Run end-to-end tests (pull_request) Has been cancelled
GitHub Actions Security Analysis with zizmor 🌈 / Run zizmor 🌈 (pull_request) Has been cancelled
Prevent blocked / Prevent blocked (pull_request_target) Has been cancelled
PR changelog label / pr-changelog-label (pull_request_target) Has been cancelled
Build / deploy_develop (pull_request) Has been cancelled
Build / docker_for_develop (pull_request) Has been cancelled
Build & publish embedded packages for releases / build_element_call (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish tarball (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish NPM (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish Android AAR (pull_request) Has been cancelled
Build & publish embedded packages for releases / Publish SwiftPM Library (pull_request) Has been cancelled
Build & publish embedded packages for releases / Update release notes (pull_request) Has been cancelled
settings
This commit is contained in:
391
src/livekit/NoiseGateProcessor.worklet.ts
Normal file
391
src/livekit/NoiseGateProcessor.worklet.ts
Normal file
@@ -0,0 +1,391 @@
|
||||
/*
|
||||
Copyright 2026 New Vector Ltd.
|
||||
|
||||
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
|
||||
Please see LICENSE in the repository root for full details.
|
||||
*/
|
||||
|
||||
declare const sampleRate: number;
|
||||
declare class AudioWorkletProcessor {
|
||||
public readonly port: MessagePort;
|
||||
public constructor(options?: {
|
||||
processorOptions?: Record<string, unknown>;
|
||||
});
|
||||
public process(
|
||||
inputs: Float32Array[][],
|
||||
outputs: Float32Array[][],
|
||||
parameters: Record<string, Float32Array>,
|
||||
): boolean;
|
||||
}
|
||||
declare function registerProcessor(
|
||||
name: string,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
processorCtor: new (...args: any[]) => AudioWorkletProcessor,
|
||||
): void;
|
||||
|
||||
interface NoiseGateParams {
|
||||
// TEN-VAD params
|
||||
vadEnabled: boolean;
|
||||
vadPositiveThreshold: number; // open gate when prob >= this (0–1)
|
||||
vadNegativeThreshold: number; // close gate when prob < this (0–1)
|
||||
vadMode: "standard" | "aggressive" | "loose";
|
||||
holdMs: number; // hold time before closing gate (ms); 0 = no hold
|
||||
}
|
||||
|
||||
interface VADGateMessage {
|
||||
type: "vad-gate";
|
||||
open: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thin synchronous wrapper around the TEN-VAD Emscripten WASM module.
|
||||
* Instantiated synchronously in the AudioWorklet constructor from a
|
||||
* pre-compiled WebAssembly.Module passed via processorOptions.
|
||||
*/
|
||||
class TenVADRuntime {
|
||||
private readonly mem: WebAssembly.Memory;
|
||||
private readonly freeFn: (ptr: number) => void;
|
||||
private readonly processFn: (
|
||||
handle: number,
|
||||
audioPtr: number,
|
||||
hopSize: number,
|
||||
probPtr: number,
|
||||
flagPtr: number,
|
||||
) => number;
|
||||
private readonly destroyFn: (handle: number) => number;
|
||||
private readonly handle: number;
|
||||
private readonly audioBufPtr: number;
|
||||
private readonly probPtr: number;
|
||||
private readonly flagPtr: number;
|
||||
public readonly hopSize: number;
|
||||
|
||||
public constructor(
|
||||
module: WebAssembly.Module,
|
||||
hopSize: number,
|
||||
threshold: number,
|
||||
) {
|
||||
this.hopSize = hopSize;
|
||||
|
||||
// Late-bound memory reference — emscripten_resize_heap and memmove
|
||||
// are only called after instantiation, so closing over this is safe.
|
||||
const state = { mem: null as WebAssembly.Memory | null };
|
||||
|
||||
const imports = {
|
||||
a: {
|
||||
// abort
|
||||
a: (): never => {
|
||||
throw new Error("ten_vad abort");
|
||||
},
|
||||
// fd_write / proc_exit stub
|
||||
b: (): number => 0,
|
||||
// emscripten_resize_heap
|
||||
c: (reqBytes: number): number => {
|
||||
if (!state.mem) return 0;
|
||||
try {
|
||||
const cur = state.mem.buffer.byteLength;
|
||||
if (cur >= reqBytes) return 1;
|
||||
state.mem.grow(Math.ceil((reqBytes - cur) / 65536));
|
||||
return 1;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
},
|
||||
// fd_write stub
|
||||
d: (): number => 0,
|
||||
// environ stub
|
||||
e: (): number => 0,
|
||||
// memmove
|
||||
f: (dest: number, src: number, len: number): void => {
|
||||
if (state.mem) {
|
||||
new Uint8Array(state.mem.buffer).copyWithin(dest, src, src + len);
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// Synchronous instantiation — valid in Worker/AudioWorklet global scope
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const instance = new WebAssembly.Instance(module, imports as any);
|
||||
const asm = instance.exports as {
|
||||
g: WebAssembly.Memory; // exported memory
|
||||
h: () => void; // __wasm_call_ctors
|
||||
i: (n: number) => number; // malloc
|
||||
j: (p: number) => void; // free
|
||||
k: (handlePtr: number, hopSize: number, threshold: number) => number; // ten_vad_create
|
||||
l: (handle: number, audioPtr: number, hopSize: number, probPtr: number, flagPtr: number) => number; // ten_vad_process
|
||||
m: (handle: number) => number; // ten_vad_destroy
|
||||
};
|
||||
|
||||
state.mem = asm.g;
|
||||
this.mem = asm.g;
|
||||
this.freeFn = asm.j;
|
||||
this.processFn = asm.l;
|
||||
this.destroyFn = asm.m;
|
||||
|
||||
// Run Emscripten static constructors
|
||||
asm.h();
|
||||
|
||||
// Allocate persistent buffers (malloc is 8-byte aligned, so alignment is fine)
|
||||
this.audioBufPtr = asm.i(hopSize * 2); // Int16Array
|
||||
this.probPtr = asm.i(4); // float
|
||||
this.flagPtr = asm.i(4); // int
|
||||
|
||||
// Create VAD handle — ten_vad_create(void** handle, int hopSize, float threshold)
|
||||
const handlePtrPtr = asm.i(4);
|
||||
const ret = asm.k(handlePtrPtr, hopSize, threshold);
|
||||
if (ret !== 0) throw new Error(`ten_vad_create failed: ${ret}`);
|
||||
this.handle = new Int32Array(this.mem.buffer)[handlePtrPtr >> 2];
|
||||
asm.j(handlePtrPtr);
|
||||
}
|
||||
|
||||
/** Process one hop of Int16 audio. Returns speech probability [0–1]. */
|
||||
public process(samples: Int16Array): number {
|
||||
new Int16Array(this.mem.buffer).set(samples, this.audioBufPtr >> 1);
|
||||
this.processFn(
|
||||
this.handle,
|
||||
this.audioBufPtr,
|
||||
this.hopSize,
|
||||
this.probPtr,
|
||||
this.flagPtr,
|
||||
);
|
||||
return new Float32Array(this.mem.buffer)[this.probPtr >> 2];
|
||||
}
|
||||
|
||||
public destroy(): void {
|
||||
this.destroyFn(this.handle);
|
||||
this.freeFn(this.audioBufPtr);
|
||||
this.freeFn(this.probPtr);
|
||||
this.freeFn(this.flagPtr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* AudioWorkletProcessor implementing an in-worklet TEN-VAD gate running
|
||||
* per-sample.
|
||||
*
|
||||
* TEN-VAD gate: accumulates audio with 3:1 decimation (48 kHz → 16 kHz),
|
||||
* runs the TEN-VAD model synchronously every 256 samples (16 ms), and
|
||||
* controls vadGateOpen with hysteresis. No IPC round-trip required.
|
||||
* Asymmetric ramp: 5 ms open (minimise speech onset masking), 20 ms close
|
||||
* (de-click on silence).
|
||||
*/
|
||||
class NoiseGateProcessor extends AudioWorkletProcessor {
|
||||
// VAD gate state
|
||||
private vadGateOpen = true; // starts open; TEN-VAD closes it on first silent frame
|
||||
private vadAttenuation = 1.0;
|
||||
// Asymmetric ramp rates — recomputed in updateParams based on vadAggressive
|
||||
private vadOpenRampRate = 1.0 / (0.005 * sampleRate); // default: 5 ms
|
||||
private vadCloseRampRate = 1.0 / (0.02 * sampleRate); // default: 20 ms
|
||||
|
||||
// TEN-VAD state
|
||||
private vadEnabled = false;
|
||||
private vadPositiveThreshold = 0.5;
|
||||
private vadNegativeThreshold = 0.3;
|
||||
private holdMs = 0;
|
||||
private vadHoldHops = 0; // hold expressed in VAD hops
|
||||
private vadHoldCounter = 0; // hops of continuous sub-threshold signal while gate is open
|
||||
private tenVadRuntime: TenVADRuntime | null = null;
|
||||
private tenVadModule: WebAssembly.Module | undefined = undefined;
|
||||
// 3:1 decimation from AudioContext sample rate to 16 kHz
|
||||
private readonly decRatio = Math.max(1, Math.round(sampleRate / 16000));
|
||||
private decPhase = 0;
|
||||
private decAcc = 0;
|
||||
// Buffer sized for max hop (256); vadHopSize tracks how many samples to collect
|
||||
private readonly vadHopBuf = new Int16Array(256);
|
||||
private vadHopSize = 256; // standard: 256 (16 ms), aggressive: 160 (10 ms)
|
||||
private vadHopCount = 0;
|
||||
|
||||
private logCounter = 0;
|
||||
|
||||
public constructor(options?: {
|
||||
processorOptions?: Record<string, unknown>;
|
||||
}) {
|
||||
super(options);
|
||||
|
||||
// Try to instantiate TEN-VAD from the pre-compiled module passed by the main thread
|
||||
this.tenVadModule = options?.processorOptions?.tenVadModule as
|
||||
| WebAssembly.Module
|
||||
| undefined;
|
||||
if (this.tenVadModule) {
|
||||
try {
|
||||
// Default: standard mode — 256 samples @ 16 kHz = 16 ms
|
||||
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, 256, 0.5);
|
||||
this.port.postMessage({
|
||||
type: "log",
|
||||
msg: "[NoiseGate worklet] TEN-VAD runtime initialized, decRatio=" + this.decRatio,
|
||||
});
|
||||
} catch (e) {
|
||||
this.port.postMessage({
|
||||
type: "log",
|
||||
msg: "[NoiseGate worklet] TEN-VAD init failed: " + String(e),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
this.port.onmessage = (
|
||||
e: MessageEvent<NoiseGateParams | VADGateMessage>,
|
||||
): void => {
|
||||
if ((e.data as VADGateMessage).type === "vad-gate") {
|
||||
this.vadGateOpen = (e.data as VADGateMessage).open;
|
||||
} else {
|
||||
this.updateParams(e.data as NoiseGateParams);
|
||||
}
|
||||
};
|
||||
|
||||
this.updateParams({
|
||||
vadEnabled: false,
|
||||
vadPositiveThreshold: 0.5,
|
||||
vadNegativeThreshold: 0.3,
|
||||
vadMode: "standard",
|
||||
holdMs: 0,
|
||||
});
|
||||
|
||||
this.port.postMessage({
|
||||
type: "log",
|
||||
msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate,
|
||||
});
|
||||
}
|
||||
|
||||
private updateParams(p: NoiseGateParams): void {
|
||||
this.vadEnabled = p.vadEnabled ?? false;
|
||||
this.vadPositiveThreshold = p.vadPositiveThreshold ?? 0.5;
|
||||
this.vadNegativeThreshold = p.vadNegativeThreshold ?? 0.3;
|
||||
this.holdMs = p.holdMs ?? 0;
|
||||
|
||||
const newMode = p.vadMode ?? "standard";
|
||||
if (newMode === "aggressive") {
|
||||
this.vadOpenRampRate = 1.0 / (0.001 * sampleRate); // 1 ms
|
||||
this.vadCloseRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||
} else if (newMode === "loose") {
|
||||
this.vadOpenRampRate = 1.0 / (0.012 * sampleRate); // 12 ms
|
||||
this.vadCloseRampRate = 1.0 / (0.032 * sampleRate); // 32 ms
|
||||
} else {
|
||||
this.vadOpenRampRate = 1.0 / (0.005 * sampleRate); // 5 ms
|
||||
this.vadCloseRampRate = 1.0 / (0.02 * sampleRate); // 20 ms
|
||||
}
|
||||
|
||||
// Hop size: aggressive=160 (10 ms @ 16 kHz), others=256 (16 ms)
|
||||
const newHopSize = newMode === "aggressive" ? 160 : 256;
|
||||
if (newHopSize !== this.vadHopSize && this.tenVadModule) {
|
||||
this.tenVadRuntime?.destroy();
|
||||
this.tenVadRuntime = null;
|
||||
this.vadHopCount = 0;
|
||||
try {
|
||||
this.tenVadRuntime = new TenVADRuntime(this.tenVadModule, newHopSize, 0.5);
|
||||
} catch (e) {
|
||||
this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] TEN-VAD recreate failed: " + String(e) });
|
||||
}
|
||||
}
|
||||
this.vadHopSize = newHopSize;
|
||||
|
||||
// Recompute hold in hops: ceil((holdMs / 1000) * 16000 / vadHopSize)
|
||||
this.vadHoldHops = this.holdMs > 0
|
||||
? Math.ceil((this.holdMs / 1000) * 16000 / this.vadHopSize)
|
||||
: 0;
|
||||
this.vadHoldCounter = 0;
|
||||
|
||||
if (!this.vadEnabled) this.vadGateOpen = true;
|
||||
this.port.postMessage({
|
||||
type: "log",
|
||||
msg: "[NoiseGate worklet] params updated: vadEnabled=" + p.vadEnabled
|
||||
+ " vadPos=" + p.vadPositiveThreshold
|
||||
+ " vadNeg=" + p.vadNegativeThreshold
|
||||
+ " vadMode=" + newMode
|
||||
+ " holdMs=" + this.holdMs,
|
||||
});
|
||||
}
|
||||
|
||||
public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean {
|
||||
const input = inputs[0];
|
||||
const output = outputs[0];
|
||||
if (!input || input.length === 0) return true;
|
||||
|
||||
const blockSize = input[0]?.length ?? 128;
|
||||
|
||||
for (let i = 0; i < blockSize; i++) {
|
||||
// --- TEN-VAD in-worklet processing ---
|
||||
// Accumulate raw mono samples with decRatio:1 decimation (48 kHz → 16 kHz).
|
||||
// Every 256 output samples (16 ms) run the WASM VAD and update vadGateOpen.
|
||||
if (this.vadEnabled && this.tenVadRuntime !== null) {
|
||||
this.decAcc += input[0]?.[i] ?? 0;
|
||||
this.decPhase++;
|
||||
if (this.decPhase >= this.decRatio) {
|
||||
this.decPhase = 0;
|
||||
const avg = this.decAcc / this.decRatio;
|
||||
this.decAcc = 0;
|
||||
// Float32 [-1,1] → Int16 with clamping
|
||||
const s16 =
|
||||
avg >= 1.0
|
||||
? 32767
|
||||
: avg <= -1.0
|
||||
? -32768
|
||||
: (avg * 32767 + 0.5) | 0;
|
||||
this.vadHopBuf[this.vadHopCount++] = s16;
|
||||
|
||||
if (this.vadHopCount >= this.vadHopSize) {
|
||||
this.vadHopCount = 0;
|
||||
const prob = this.tenVadRuntime.process(this.vadHopBuf);
|
||||
if (prob >= this.vadPositiveThreshold) {
|
||||
// Speech detected — open gate, reset hold counter
|
||||
this.vadGateOpen = true;
|
||||
this.vadHoldCounter = 0;
|
||||
} else if (prob < this.vadNegativeThreshold) {
|
||||
if (this.vadGateOpen) {
|
||||
if (this.vadHoldHops === 0) {
|
||||
this.vadGateOpen = false;
|
||||
} else {
|
||||
this.vadHoldCounter++;
|
||||
if (this.vadHoldCounter >= this.vadHoldHops) {
|
||||
this.vadGateOpen = false;
|
||||
this.vadHoldCounter = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ambiguous zone — reset hold counter so hold only fires on sustained silence
|
||||
this.vadHoldCounter = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Asymmetric ramp: fast open (5 ms) to minimise speech onset masking,
|
||||
// slow close (20 ms) to de-click on silence transitions.
|
||||
const vadTarget = this.vadGateOpen ? 1.0 : 0.0;
|
||||
if (this.vadAttenuation < vadTarget) {
|
||||
this.vadAttenuation = Math.min(
|
||||
vadTarget,
|
||||
this.vadAttenuation + this.vadOpenRampRate,
|
||||
);
|
||||
} else if (this.vadAttenuation > vadTarget) {
|
||||
this.vadAttenuation = Math.max(
|
||||
vadTarget,
|
||||
this.vadAttenuation - this.vadCloseRampRate,
|
||||
);
|
||||
}
|
||||
|
||||
const gain = this.vadAttenuation;
|
||||
|
||||
for (let c = 0; c < output.length; c++) {
|
||||
const inCh = input[c] ?? input[0];
|
||||
const outCh = output[c];
|
||||
if (inCh && outCh) {
|
||||
outCh[i] = (inCh[i] ?? 0) * gain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.logCounter++;
|
||||
if (this.logCounter % 375 === 0) {
|
||||
this.port.postMessage({
|
||||
type: "log",
|
||||
msg: "[NoiseGate worklet] vadOpen=" + this.vadGateOpen
|
||||
+ " vadAtten=" + this.vadAttenuation.toFixed(3),
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor("noise-gate-processor", NoiseGateProcessor);
|
||||
163
src/livekit/NoiseGateTransformer.ts
Normal file
163
src/livekit/NoiseGateTransformer.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
Copyright 2026 New Vector Ltd.
|
||||
|
||||
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
|
||||
Please see LICENSE in the repository root for full details.
|
||||
*/
|
||||
|
||||
import { type Track } from "livekit-client";
|
||||
import { logger } from "matrix-js-sdk/lib/logger";
|
||||
// ?worker&url tells Vite to compile the TypeScript worklet and return its URL.
|
||||
// Without this, Vite copies the .ts file verbatim and the browser rejects it.
|
||||
import compiledWorkletUrl from "./NoiseGateProcessor.worklet.ts?worker&url";
|
||||
|
||||
const log = logger.getChild("[NoiseGateTransformer]");
|
||||
|
||||
export interface NoiseGateParams {
|
||||
// TEN-VAD params — processed entirely inside the AudioWorklet
|
||||
vadEnabled: boolean;
|
||||
vadPositiveThreshold: number; // open gate when prob >= this (0–1)
|
||||
vadNegativeThreshold: number; // close gate when prob < this (0–1); computed by Publisher
|
||||
vadMode: "standard" | "aggressive" | "loose";
|
||||
holdMs: number; // hold time before closing gate (ms); 0 = no hold
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches LiveKit's AudioProcessorOptions (experimental API, not publicly
|
||||
* exported, so we declare it locally based on the type definitions).
|
||||
*/
|
||||
interface AudioProcessorOptions {
|
||||
kind: Track.Kind.Audio;
|
||||
track: MediaStreamTrack;
|
||||
audioContext: AudioContext;
|
||||
element?: HTMLMediaElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches LiveKit's TrackProcessor<Track.Kind.Audio> interface.
|
||||
*/
|
||||
export interface AudioTrackProcessor {
|
||||
name: string;
|
||||
processedTrack?: MediaStreamTrack;
|
||||
init(opts: AudioProcessorOptions): Promise<void>;
|
||||
restart(opts: AudioProcessorOptions): Promise<void>;
|
||||
destroy(): Promise<void>;
|
||||
}
|
||||
|
||||
// Cached compiled TEN-VAD module — compiled once, reused across processor restarts.
|
||||
let tenVadModulePromise: Promise<WebAssembly.Module> | null = null;
|
||||
|
||||
function getTenVADModule(): Promise<WebAssembly.Module> {
|
||||
if (!tenVadModulePromise) {
|
||||
tenVadModulePromise = fetch("/vad/ten_vad.wasm")
|
||||
.then((r) => {
|
||||
if (!r.ok) throw new Error(`Failed to fetch ten_vad.wasm: ${r.status}`);
|
||||
return r.arrayBuffer();
|
||||
})
|
||||
.then((buf) => WebAssembly.compile(buf))
|
||||
.catch((e) => {
|
||||
// Clear the cache so a retry is possible on next attach
|
||||
tenVadModulePromise = null;
|
||||
throw e;
|
||||
});
|
||||
}
|
||||
return tenVadModulePromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* LiveKit audio track processor that applies TEN-VAD via AudioWorklet.
|
||||
*
|
||||
* The TEN-VAD WASM module is fetched once, compiled, and passed to the worklet
|
||||
* via processorOptions so it runs synchronously inside the audio thread —
|
||||
* no IPC round-trip, ~16 ms VAD latency.
|
||||
*
|
||||
* Audio graph: sourceNode → workletNode → destinationNode
|
||||
* processedTrack is destinationNode.stream.getAudioTracks()[0]
|
||||
*/
|
||||
export class NoiseGateTransformer implements AudioTrackProcessor {
|
||||
public readonly name = "noise-gate";
|
||||
public processedTrack?: MediaStreamTrack;
|
||||
|
||||
private workletNode?: AudioWorkletNode;
|
||||
private sourceNode?: MediaStreamAudioSourceNode;
|
||||
private destinationNode?: MediaStreamAudioDestinationNode;
|
||||
private params: NoiseGateParams;
|
||||
|
||||
public constructor(params: NoiseGateParams) {
|
||||
this.params = { ...params };
|
||||
}
|
||||
|
||||
public async init(opts: AudioProcessorOptions): Promise<void> {
|
||||
const { track, audioContext } = opts;
|
||||
|
||||
log.info("init() called, audioContext state:", audioContext.state, "params:", this.params);
|
||||
|
||||
// Fetch and compile the TEN-VAD WASM module (cached after first call)
|
||||
let tenVadModule: WebAssembly.Module | undefined;
|
||||
try {
|
||||
tenVadModule = await getTenVADModule();
|
||||
log.info("TEN-VAD WASM module compiled");
|
||||
} catch (e) {
|
||||
log.warn("TEN-VAD WASM module unavailable — VAD disabled:", e);
|
||||
}
|
||||
|
||||
log.info("loading worklet from:", compiledWorkletUrl);
|
||||
await audioContext.audioWorklet.addModule(compiledWorkletUrl);
|
||||
log.info("worklet module loaded");
|
||||
|
||||
this.workletNode = new AudioWorkletNode(
|
||||
audioContext,
|
||||
"noise-gate-processor",
|
||||
{
|
||||
processorOptions: {
|
||||
tenVadModule,
|
||||
},
|
||||
},
|
||||
);
|
||||
this.workletNode.port.onmessage = (
|
||||
e: MessageEvent<{ type: string; msg: string }>,
|
||||
): void => {
|
||||
if (e.data?.type === "log") log.debug(e.data.msg);
|
||||
};
|
||||
this.sendParams();
|
||||
|
||||
this.sourceNode = audioContext.createMediaStreamSource(
|
||||
new MediaStream([track]),
|
||||
);
|
||||
this.destinationNode = audioContext.createMediaStreamDestination();
|
||||
|
||||
this.sourceNode.connect(this.workletNode);
|
||||
this.workletNode.connect(this.destinationNode);
|
||||
|
||||
this.processedTrack = this.destinationNode.stream.getAudioTracks()[0];
|
||||
log.info("graph wired, processedTrack:", this.processedTrack);
|
||||
}
|
||||
|
||||
public async restart(opts: AudioProcessorOptions): Promise<void> {
|
||||
await this.destroy();
|
||||
await this.init(opts);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
public async destroy(): Promise<void> {
|
||||
this.sourceNode?.disconnect();
|
||||
this.workletNode?.disconnect();
|
||||
this.destinationNode?.disconnect();
|
||||
this.sourceNode = undefined;
|
||||
this.workletNode = undefined;
|
||||
this.destinationNode = undefined;
|
||||
this.processedTrack = undefined;
|
||||
}
|
||||
|
||||
/** Push updated gate/VAD parameters to the running worklet. */
|
||||
public updateParams(params: NoiseGateParams): void {
|
||||
this.params = { ...params };
|
||||
this.sendParams();
|
||||
}
|
||||
|
||||
private sendParams(): void {
|
||||
if (!this.workletNode) return;
|
||||
log.debug("sendParams:", this.params);
|
||||
this.workletNode.port.postMessage(this.params);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user