Files
element-call-custom/src/state/CallViewModel/localMember/Publisher.ts
mk 859db651e0 feat: add VAD threshold controls and smooth gate ramp
Replace the hard 0/1 VAD gate with a 20ms ramp in the worklet to prevent
clicks on open/close transitions. Expose positive and negative speech
probability thresholds as user-adjustable settings (defaults 0.5/0.35).
Sliders with restore-defaults button added to the VAD section of the
audio settings tab.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 23:57:35 -03:00

578 lines
21 KiB
TypeScript

/*
Copyright 2025 Element Creations Ltd.
Copyright 2025 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import {
ConnectionState as LivekitConnectionState,
LocalAudioTrack,
type LocalTrackPublication,
LocalVideoTrack,
ParticipantEvent,
type Room as LivekitRoom,
Track,
} from "livekit-client";
import {
combineLatest,
map,
NEVER,
type Observable,
type Subscription,
switchMap,
} from "rxjs";
import { type Logger } from "matrix-js-sdk/lib/logger";
import type { Behavior } from "../../Behavior.ts";
import type { MediaDevices, SelectedDevice } from "../../MediaDevices.ts";
import type { MuteStates } from "../../MuteStates.ts";
import {
type ProcessorState,
trackProcessorSync,
} from "../../../livekit/TrackProcessorContext.tsx";
import { getUrlParams } from "../../../UrlParams.ts";
import {
noiseGateEnabled,
noiseGateThreshold,
noiseGateAttack,
noiseGateHold,
noiseGateRelease,
transientSuppressorEnabled,
transientThreshold,
transientRelease,
vadEnabled,
vadPositiveThreshold,
vadNegativeThreshold,
} from "../../../settings/settings.ts";
import {
type NoiseGateParams,
NoiseGateTransformer,
} from "../../../livekit/NoiseGateTransformer.ts";
import { SileroVADGate } from "../../../livekit/SileroVADGate.ts";
import { observeTrackReference$ } from "../../observeTrackReference";
import { type Connection } from "../remoteMembers/Connection.ts";
import { ObservableScope } from "../../ObservableScope.ts";
/**
* A wrapper for a Connection object.
* This wrapper will manage the connection used to publish to the LiveKit room.
* The Publisher is also responsible for creating the media tracks.
*/
export class Publisher {
/**
* By default, livekit will start publishing tracks as soon as they are created.
* In the matrix RTC world, we want to control when tracks are published based
* on whether the user is part of the RTC session or not.
*/
public shouldPublish = false;
private readonly scope = new ObservableScope();
/**
* Creates a new Publisher.
* @param connection - The connection to use for publishing.
* @param devices - The media devices to use for audio and video input.
* @param muteStates - The mute states for audio and video.
* @param trackerProcessorState$ - The processor state for the video track processor (e.g. background blur).
* @param logger - The logger to use for logging :D.
*/
public constructor(
private connection: Pick<Connection, "livekitRoom" | "state$">, //setE2EEEnabled,
devices: MediaDevices,
private readonly muteStates: MuteStates,
trackerProcessorState$: Behavior<ProcessorState>,
private logger: Logger,
) {
const { controlledAudioDevices } = getUrlParams();
const room = connection.livekitRoom;
room.setE2EEEnabled(room.options.e2ee !== undefined)?.catch((e: Error) => {
this.logger.error("Failed to set E2EE enabled on room", e);
});
// Setup track processor syncing (blur)
this.observeTrackProcessors(this.scope, room, trackerProcessorState$);
// Setup noise gate on the local microphone track
this.applyNoiseGate(this.scope, room);
// Observe media device changes and update LiveKit active devices accordingly
this.observeMediaDevices(this.scope, devices, controlledAudioDevices);
this.workaroundRestartAudioInputTrackChrome(devices, this.scope);
this.connection.livekitRoom.localParticipant.on(
ParticipantEvent.LocalTrackPublished,
this.onLocalTrackPublished.bind(this),
);
}
public async destroy(): Promise<void> {
this.scope.end();
this.logger.info("Scope ended -> unset handler");
this.muteStates.audio.unsetHandler();
this.muteStates.video.unsetHandler();
this.logger.info(`Start to stop tracks`);
try {
await this.stopTracks();
this.logger.info(`Done to stop tracks`);
} catch (e) {
this.logger.error(`Failed to stop tracks: ${e}`);
}
}
// LiveKit will publish the tracks as soon as they are created
// but we want to control when tracks are published.
// We cannot just mute the tracks, even if this will effectively stop the publishing,
// it would also prevent the user from seeing their own video/audio preview.
// So for that we use pauseUpStream(): Stops sending media to the server by replacing
// the sender track with null, but keeps the local MediaStreamTrack active.
// The user can still see/hear themselves locally, but remote participants see nothing.
private onLocalTrackPublished(
localTrackPublication: LocalTrackPublication,
): void {
this.logger.info("Local track published", localTrackPublication);
const lkRoom = this.connection.livekitRoom;
if (!this.shouldPublish) {
this.pauseUpstreams(lkRoom, [localTrackPublication.source]).catch((e) => {
this.logger.error(`Failed to pause upstreams`, e);
});
}
// also check the mute state and apply it
if (localTrackPublication.source === Track.Source.Microphone) {
const enabled = this.muteStates.audio.enabled$.value;
lkRoom.localParticipant.setMicrophoneEnabled(enabled).catch((e) => {
this.logger.error(
`Failed to enable microphone track, enabled:${enabled}`,
e,
);
});
} else if (localTrackPublication.source === Track.Source.Camera) {
const enabled = this.muteStates.video.enabled$.value;
lkRoom.localParticipant.setCameraEnabled(enabled).catch((e) => {
this.logger.error(
`Failed to enable camera track, enabled:${enabled}`,
e,
);
});
}
}
/**
* Create and setup local audio and video tracks based on the current mute states.
* It creates the tracks only if audio and/or video is enabled, to avoid unnecessary
* permission prompts.
*
* It also observes mute state changes to update LiveKit microphone/camera states accordingly.
* If a track is not created initially because disabled, it will be created when unmuting.
*
* This call is not blocking anymore, instead callers can listen to the
* `RoomEvent.MediaDevicesError` event in the LiveKit room to be notified of any errors.
*
*/
public async createAndSetupTracks(): Promise<void> {
this.logger.debug("createAndSetupTracks called");
const lkRoom = this.connection.livekitRoom;
// Observe mute state changes and update LiveKit microphone/camera states accordingly
this.observeMuteStates();
// Check if audio and/or video is enabled. We only create tracks if enabled,
// because it could prompt for permission, and we don't want to do that unnecessarily.
const audio = this.muteStates.audio.enabled$.value;
const video = this.muteStates.video.enabled$.value;
// We don't await the creation, because livekit could block until the tracks
// are fully published, and not only that they are created.
// We don't have control on that, localParticipant creates and publishes the tracks
// asap.
// We are using the `ParticipantEvent.LocalTrackPublished` to be notified
// when tracks are actually published, and at that point
// we can pause upstream if needed (depending on if startPublishing has been called).
if (audio && video) {
// Enable both at once in order to have a single permission prompt!
void lkRoom.localParticipant.enableCameraAndMicrophone();
} else if (audio) {
void lkRoom.localParticipant.setMicrophoneEnabled(true);
} else if (video) {
void lkRoom.localParticipant.setCameraEnabled(true);
}
return Promise.resolve();
}
private async pauseUpstreams(
lkRoom: LivekitRoom,
sources: Track.Source[],
): Promise<void> {
for (const source of sources) {
const track = lkRoom.localParticipant.getTrackPublication(source)?.track;
if (track) {
await track.pauseUpstream();
} else {
this.logger.warn(
`No track found for source ${source} to pause upstream`,
);
}
}
}
private async resumeUpstreams(
lkRoom: LivekitRoom,
sources: Track.Source[],
): Promise<void> {
for (const source of sources) {
const track = lkRoom.localParticipant.getTrackPublication(source)?.track;
if (track) {
await track.resumeUpstream();
} else {
this.logger.warn(
`No track found for source ${source} to resume upstream`,
);
}
}
}
/**
*
* Request to publish local tracks to the LiveKit room.
* This will wait for the connection to be ready before publishing.
* Livekit also have some local retry logic for publishing tracks.
* Can be called multiple times, localparticipant manages the state of published tracks (or pending publications).
*
* @returns
*/
public async startPublishing(): Promise<void> {
if (this.shouldPublish) {
this.logger.debug(`Already publishing, ignoring startPublishing call`);
return;
}
this.shouldPublish = true;
this.logger.debug("startPublishing called");
const lkRoom = this.connection.livekitRoom;
// Resume upstream for both audio and video tracks
// We need to call it explicitly because call setTrackEnabled does not always
// resume upstream. It will only if you switch the track from disabled to enabled,
// but if the track is already enabled but upstream is paused, it won't resume it.
// TODO what about screen share?
try {
await this.resumeUpstreams(lkRoom, [
Track.Source.Microphone,
Track.Source.Camera,
]);
} catch (e) {
this.logger.error(`Failed to resume upstreams`, e);
}
}
public async stopPublishing(): Promise<void> {
this.logger.debug("stopPublishing called");
this.shouldPublish = false;
// Pause upstream will stop sending media to the server, while keeping
// the local MediaStreamTrack active, so the user can still see themselves.
await this.pauseUpstreams(this.connection.livekitRoom, [
Track.Source.Microphone,
Track.Source.Camera,
Track.Source.ScreenShare,
]);
}
public async stopTracks(): Promise<void> {
const lkRoom = this.connection.livekitRoom;
for (const source of [
Track.Source.Microphone,
Track.Source.Camera,
Track.Source.ScreenShare,
]) {
const localPub = lkRoom.localParticipant.getTrackPublication(source);
if (localPub?.track) {
// stops and unpublishes the track
await lkRoom.localParticipant.unpublishTrack(localPub!.track, true);
}
}
}
/// Private methods
// Restart the audio input track whenever we detect that the active media
// device has changed to refer to a different hardware device. We do this
// for the sake of Chrome, which provides a "default" device that is meant
// to match the system's default audio input, whatever that may be.
// This is special-cased for only audio inputs because we need to dig around
// in the LocalParticipant object for the track object and there's not a nice
// way to do that generically. There is usually no OS-level default video capture
// device anyway, and audio outputs work differently.
private workaroundRestartAudioInputTrackChrome(
devices: MediaDevices,
scope: ObservableScope,
): void {
const lkRoom = this.connection.livekitRoom;
devices.audioInput.selected$
.pipe(
switchMap((device) => device?.hardwareDeviceChange$ ?? NEVER),
scope.bind(),
)
.subscribe(() => {
if (lkRoom.state != LivekitConnectionState.Connected) return;
const activeMicTrack = Array.from(
lkRoom.localParticipant.audioTrackPublications.values(),
).find((d) => d.source === Track.Source.Microphone)?.track;
if (
activeMicTrack &&
// only restart if the stream is still running: LiveKit will detect
// when a track stops & restart appropriately, so this is not our job.
// Plus, we need to avoid restarting again if the track is already in
// the process of being restarted.
activeMicTrack.mediaStreamTrack.readyState !== "ended"
) {
this.logger?.info(
"Restarting audio device track due to active media device changed (workaroundRestartAudioInputTrackChrome)",
);
// Restart the track, which will cause Livekit to do another
// getUserMedia() call with deviceId: default to get the *new* default device.
// Note that room.switchActiveDevice() won't work: Livekit will ignore it because
// the deviceId hasn't changed (was & still is default).
lkRoom.localParticipant
.getTrackPublication(Track.Source.Microphone)
?.audioTrack?.restartTrack()
.catch((e) => {
this.logger.error(`Failed to restart audio device track`, e);
});
}
});
}
// Observe changes in the selected media devices and update the LiveKit room accordingly.
private observeMediaDevices(
scope: ObservableScope,
devices: MediaDevices,
controlledAudioDevices: boolean,
): void {
const lkRoom = this.connection.livekitRoom;
const syncDevice = (
kind: MediaDeviceKind,
selected$: Observable<SelectedDevice | undefined>,
): Subscription =>
selected$.pipe(scope.bind()).subscribe((device) => {
if (lkRoom.state != LivekitConnectionState.Connected) return;
// if (this.connectionState$.value !== ConnectionState.Connected) return;
this.logger.info(
"[LivekitRoom] syncDevice room.getActiveDevice(kind) !== d.id :",
lkRoom.getActiveDevice(kind),
" !== ",
device?.id,
);
if (
device !== undefined &&
lkRoom.getActiveDevice(kind) !== device.id
) {
lkRoom
.switchActiveDevice(kind, device.id)
.catch((e: Error) =>
this.logger.error(
`Failed to sync ${kind} device with LiveKit`,
e,
),
);
}
});
syncDevice("audioinput", devices.audioInput.selected$);
if (!controlledAudioDevices)
syncDevice("audiooutput", devices.audioOutput.selected$);
syncDevice("videoinput", devices.videoInput.selected$);
}
/**
* Observe changes in the mute states and update the LiveKit room accordingly.
* @private
*/
private observeMuteStates(): void {
const lkRoom = this.connection.livekitRoom;
this.muteStates.audio.setHandler(async (enable) => {
try {
this.logger.debug(
`handler: Setting LiveKit microphone enabled: ${enable}`,
);
await lkRoom.localParticipant.setMicrophoneEnabled(enable);
// Unmute will restart the track if it was paused upstream,
// but until explicitly requested, we want to keep it paused.
if (!this.shouldPublish && enable) {
await this.pauseUpstreams(lkRoom, [Track.Source.Microphone]);
}
} catch (e) {
this.logger.error("Failed to update LiveKit audio input mute state", e);
}
return lkRoom.localParticipant.isMicrophoneEnabled;
});
this.muteStates.video.setHandler(async (enable) => {
try {
this.logger.debug(`handler: Setting LiveKit camera enabled: ${enable}`);
await lkRoom.localParticipant.setCameraEnabled(enable);
// Unmute will restart the track if it was paused upstream,
// but until explicitly requested, we want to keep it paused.
if (!this.shouldPublish && enable) {
await this.pauseUpstreams(lkRoom, [Track.Source.Camera]);
}
} catch (e) {
this.logger.error("Failed to update LiveKit video input mute state", e);
}
return lkRoom.localParticipant.isCameraEnabled;
});
}
private applyNoiseGate(scope: ObservableScope, room: LivekitRoom): void {
// Observe the local microphone track
const audioTrack$ = scope.behavior(
observeTrackReference$(
room.localParticipant,
Track.Source.Microphone,
).pipe(
map((ref) => {
const track = ref?.publication.track;
return track instanceof LocalAudioTrack ? track : null;
}),
),
null,
);
let transformer: NoiseGateTransformer | null = null;
let audioCtx: AudioContext | null = null;
let vadGate: SileroVADGate | null = null;
const currentParams = (): NoiseGateParams => ({
threshold: noiseGateThreshold.getValue(),
attackMs: noiseGateAttack.getValue(),
holdMs: noiseGateHold.getValue(),
releaseMs: noiseGateRelease.getValue(),
transientEnabled: transientSuppressorEnabled.getValue(),
transientThresholdDb: transientThreshold.getValue(),
transientReleaseMs: transientRelease.getValue(),
});
const stopVAD = (): void => {
if (vadGate) {
void vadGate.destroy();
vadGate = null;
}
// Always reopen gate when VAD stops so audio flows without VAD
transformer?.setVADOpen(true);
};
const startVAD = (track: LocalAudioTrack, ctx: AudioContext): void => {
stopVAD();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const rawTrack: MediaStreamTrack | undefined = (track as any).mediaStreamTrack;
if (!rawTrack) {
this.logger.warn("[VAD] no underlying MediaStreamTrack — skipping VAD");
return;
}
const stream = new MediaStream([rawTrack]);
vadGate = new SileroVADGate(stream, ctx, {
positiveThreshold: vadPositiveThreshold.getValue(),
negativeThreshold: vadNegativeThreshold.getValue(),
});
vadGate.onOpen = (): void => transformer?.setVADOpen(true);
vadGate.onClose = (): void => transformer?.setVADOpen(false);
vadGate.start().catch((e: unknown) => {
this.logger.error("[VAD] failed to start", e);
});
};
// Attach / detach processor when enabled state or the track changes.
combineLatest([audioTrack$, noiseGateEnabled.value$])
.pipe(scope.bind())
.subscribe(([audioTrack, enabled]) => {
if (!audioTrack) return;
if (enabled && !audioTrack.getProcessor()) {
const params = currentParams();
this.logger.info("[NoiseGate] attaching processor, params:", params);
transformer = new NoiseGateTransformer(params);
audioCtx = new AudioContext();
this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(audioCtx);
audioCtx.resume().then(async () => {
this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state);
return audioTrack
// eslint-disable-next-line @typescript-eslint/no-explicit-any
.setProcessor(transformer as any);
}).then(() => {
this.logger.info("[NoiseGate] setProcessor resolved");
if (vadEnabled.getValue() && audioCtx) startVAD(audioTrack, audioCtx);
}).catch((e: unknown) => {
this.logger.error("[NoiseGate] setProcessor failed", e);
});
} else if (!enabled && audioTrack.getProcessor()) {
this.logger.info("[NoiseGate] removing processor");
stopVAD();
void audioTrack.stopProcessor();
void audioCtx?.close();
audioCtx = null;
transformer = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(audioTrack as any).setAudioContext(undefined);
} else {
this.logger.info("[NoiseGate] tick — enabled:", enabled, "hasProcessor:", !!audioTrack.getProcessor());
}
});
// Start/stop VAD when its toggle changes.
combineLatest([audioTrack$, vadEnabled.value$])
.pipe(scope.bind())
.subscribe(([audioTrack, enabled]) => {
if (!audioTrack || !audioCtx) return;
if (enabled) {
startVAD(audioTrack, audioCtx);
} else {
stopVAD();
}
});
// Push VAD threshold changes to the live gate without recreating it.
combineLatest([vadPositiveThreshold.value$, vadNegativeThreshold.value$])
.pipe(scope.bind())
.subscribe(([positiveThreshold, negativeThreshold]) => {
vadGate?.updateOptions({ positiveThreshold, negativeThreshold });
});
// Push param changes to the live worklet without recreating the processor.
combineLatest([
noiseGateThreshold.value$,
noiseGateAttack.value$,
noiseGateHold.value$,
noiseGateRelease.value$,
transientSuppressorEnabled.value$,
transientThreshold.value$,
transientRelease.value$,
])
.pipe(scope.bind())
.subscribe(([threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs]) => {
transformer?.updateParams({
threshold, attackMs, holdMs, releaseMs,
transientEnabled, transientThresholdDb, transientReleaseMs,
});
});
}
private observeTrackProcessors(
scope: ObservableScope,
room: LivekitRoom,
trackerProcessorState$: Behavior<ProcessorState>,
): void {
const track$ = scope.behavior(
observeTrackReference$(room.localParticipant, Track.Source.Camera).pipe(
map((trackRef) => {
const track = trackRef?.publication.track;
return track instanceof LocalVideoTrack ? track : null;
}),
),
null,
);
trackProcessorSync(scope, track$, trackerProcessorState$);
}
}