fix: feed VAD the raw mic track captured before setProcessor

After setProcessor resolves, track.mediaStreamTrack returns the processed
(noise-gated) track. The VAD was seeing gated silence, closing immediately,
and deadlocking with both gates closed. Capture the raw MediaStreamTrack
before calling setProcessor and pass that to SileroVADGate instead.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mk
2026-03-24 00:06:47 -03:00
parent aff09d0e49
commit 325094b54d

View File

@@ -440,6 +440,7 @@ export class Publisher {
let transformer: NoiseGateTransformer | null = null;
let audioCtx: AudioContext | null = null;
let vadGate: SileroVADGate | null = null;
let rawMicTrack: MediaStreamTrack | null = null;
const currentParams = (): NoiseGateParams => ({
threshold: noiseGateThreshold.getValue(),
@@ -460,14 +461,8 @@ export class Publisher {
transformer?.setVADOpen(true);
};
const startVAD = (track: LocalAudioTrack, ctx: AudioContext): void => {
const startVAD = (rawTrack: MediaStreamTrack, ctx: AudioContext): void => {
stopVAD();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const rawTrack: MediaStreamTrack | undefined = (track as any).mediaStreamTrack;
if (!rawTrack) {
this.logger.warn("[VAD] no underlying MediaStreamTrack — skipping VAD");
return;
}
const stream = new MediaStream([rawTrack]);
vadGate = new SileroVADGate(stream, ctx, {
positiveThreshold: vadPositiveThreshold.getValue(),
@@ -488,6 +483,9 @@ export class Publisher {
if (enabled && !audioTrack.getProcessor()) {
const params = currentParams();
this.logger.info("[NoiseGate] attaching processor, params:", params);
// Capture the raw mic track BEFORE setProcessor replaces it
// eslint-disable-next-line @typescript-eslint/no-explicit-any
rawMicTrack = (audioTrack as any).mediaStreamTrack ?? null;
transformer = new NoiseGateTransformer(params);
audioCtx = new AudioContext();
this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
@@ -500,7 +498,7 @@ export class Publisher {
.setProcessor(transformer as any);
}).then(() => {
this.logger.info("[NoiseGate] setProcessor resolved");
if (vadEnabled.getValue() && audioCtx) startVAD(audioTrack, audioCtx);
if (vadEnabled.getValue() && audioCtx && rawMicTrack) startVAD(rawMicTrack, audioCtx);
}).catch((e: unknown) => {
this.logger.error("[NoiseGate] setProcessor failed", e);
});
@@ -511,6 +509,7 @@ export class Publisher {
void audioCtx?.close();
audioCtx = null;
transformer = null;
rawMicTrack = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(undefined);
} else {
@@ -522,9 +521,9 @@ export class Publisher {
combineLatest([audioTrack$, vadEnabled.value$])
.pipe(scope.bind())
.subscribe(([audioTrack, enabled]) => {
if (!audioTrack || !audioCtx) return;
if (!audioCtx || !rawMicTrack) return;
if (enabled) {
startVAD(audioTrack, audioCtx);
startVAD(rawMicTrack, audioCtx);
} else {
stopVAD();
}