diff --git a/src/livekit/NoiseGateProcessor.worklet.ts b/src/livekit/NoiseGateProcessor.worklet.ts
index 6c1981b7..e9204568 100644
--- a/src/livekit/NoiseGateProcessor.worklet.ts
+++ b/src/livekit/NoiseGateProcessor.worklet.ts
@@ -21,6 +21,7 @@ declare function registerProcessor(
 ): void;
 
 interface NoiseGateParams {
+  noiseGateActive: boolean;
   threshold: number; // dBFS — gate opens above this, closes below it
   attackMs: number;
   holdMs: number;
@@ -53,6 +54,7 @@ function dbToLinear(db: number): number {
  */
 class NoiseGateProcessor extends AudioWorkletProcessor {
   // Noise gate state
+  private noiseGateActive = true;
   private threshold = dbToLinear(-60);
   private attackRate = 1.0 / (0.025 * sampleRate);
   private releaseRate = 1.0 / (0.15 * sampleRate);
@@ -88,13 +90,14 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
       }
     };
     this.updateParams({
-      threshold: -60, attackMs: 25, holdMs: 200, releaseMs: 150,
+      noiseGateActive: true, threshold: -60, attackMs: 25, holdMs: 200, releaseMs: 150,
       transientEnabled: false, transientThresholdDb: 15, transientReleaseMs: 80,
     });
     this.port.postMessage({ type: "log", msg: "[NoiseGate worklet] constructor called, sampleRate=" + sampleRate });
   }
 
   private updateParams(p: NoiseGateParams): void {
+    this.noiseGateActive = p.noiseGateActive ?? true;
     this.threshold = dbToLinear(p.threshold);
     this.attackRate = 1.0 / ((p.attackMs / 1000) * sampleRate);
     this.releaseRate = 1.0 / ((p.releaseMs / 1000) * sampleRate);
@@ -147,20 +150,24 @@ class NoiseGateProcessor extends AudioWorkletProcessor {
       }
 
       // --- Noise gate ---
-      if (curLevel > this.threshold && !this.isOpen) {
-        this.isOpen = true;
-      }
-      if (curLevel <= this.threshold && this.isOpen) {
-        this.heldTime = 0;
-        this.isOpen = false;
-      }
-      if (this.isOpen) {
-        this.gateAttenuation = Math.min(1.0, this.gateAttenuation + this.attackRate);
-      } else {
-        this.heldTime += samplePeriod;
-        if (this.heldTime > this.holdTime) {
-          this.gateAttenuation = Math.max(0.0, this.gateAttenuation - this.releaseRate);
+      if (this.noiseGateActive) {
+        if (curLevel > this.threshold && !this.isOpen) {
+          this.isOpen = true;
         }
+        if (curLevel <= this.threshold && this.isOpen) {
+          this.heldTime = 0;
+          this.isOpen = false;
+        }
+        if (this.isOpen) {
+          this.gateAttenuation = Math.min(1.0, this.gateAttenuation + this.attackRate);
+        } else {
+          this.heldTime += samplePeriod;
+          if (this.heldTime > this.holdTime) {
+            this.gateAttenuation = Math.max(0.0, this.gateAttenuation - this.releaseRate);
+          }
+        }
+      } else {
+        this.gateAttenuation = 1.0;
       }
 
       // Ramp VAD attenuation toward target to avoid clicks on gate open/close
diff --git a/src/livekit/NoiseGateTransformer.ts b/src/livekit/NoiseGateTransformer.ts
index 33269e3b..74d34953 100644
--- a/src/livekit/NoiseGateTransformer.ts
+++ b/src/livekit/NoiseGateTransformer.ts
@@ -11,6 +11,7 @@ import { logger } from "matrix-js-sdk/lib/logger";
 const log = logger.getChild("[NoiseGateTransformer]");
 
 export interface NoiseGateParams {
+  noiseGateActive: boolean;
   threshold: number; // dBFS — gate opens above this, closes below it
   attackMs: number;
   holdMs: number;
diff --git a/src/livekit/SileroVADGate.ts b/src/livekit/SileroVADGate.ts
index 5a951d5e..bc022166 100644
--- a/src/livekit/SileroVADGate.ts
+++ b/src/livekit/SileroVADGate.ts
@@ -20,60 +20,61 @@ export interface SileroVADGateOptions {
 }
 
 /**
- * Wraps @ricky0123/vad-web's MicVAD to feed per-frame speech probability
- * decisions into the NoiseGateTransformer's VAD gate.
+ * Wraps @ricky0123/vad-web's MicVAD with a two-phase lifecycle:
  *
- * Uses onFrameProcessed (fires every ~96ms) rather than the segment-level
- * onSpeechStart/onSpeechEnd callbacks — those only fire at segment boundaries
- * so non-speech noise never triggers onSpeechEnd, keeping the gate open.
- * Per-frame probability control with hysteresis fixes this.
+ *   init(audioContext)  — loads the ONNX model and ORT WASM (expensive,
+ *                         call as early as possible for zero-latency enable)
+ *   start(stream)       — wires the stream and begins per-frame processing
+ *   stop()              — pauses processing, keeps model loaded
+ *   destroy()           — full teardown
  *
- * The gate starts OPEN (fail-safe): audio flows immediately and the model
- * closes it on the first silent frame. A failed model load therefore
- * degrades gracefully instead of permanently muting the user.
+ * Uses onFrameProcessed (fires every ~32ms with v5 model) with hysteresis
+ * to control the gate. Starts OPEN so audio flows immediately; the model
+ * closes it on the first silent frame.
  */
 export class SileroVADGate {
-  /** Called each time the gate transitions to open (speech detected). */
   public onOpen: () => void = () => {};
-  /** Called each time the gate transitions to closed (silence detected). */
   public onClose: () => void = () => {};
 
   private vad: MicVAD | null = null;
-  private readonly stream: MediaStream;
-  private readonly audioContext: AudioContext;
+  private activeStream: MediaStream | null = null;
   private options: SileroVADGateOptions;
   private gateOpen = true;
 
-  public constructor(stream: MediaStream, audioContext: AudioContext, options: SileroVADGateOptions) {
-    this.stream = stream;
-    this.audioContext = audioContext;
+  public constructor(options: SileroVADGateOptions) {
     this.options = options;
   }
 
-  public async start(): Promise<void> {
-    const stream = this.stream;
-    const audioContext = this.audioContext;
-
-    log.info("initialising MicVAD, baseAssetPath:", VAD_BASE_PATH);
-
+  /**
+   * Phase 1 — load the model. Call this as early as possible (e.g. when the
+   * AudioContext is first created) so start() is near-instant later.
+   */
+  public async init(audioContext: AudioContext): Promise<void> {
     // Avoid requiring SharedArrayBuffer (COOP/COEP headers) by running
     // single-threaded. Performance is sufficient for 16kHz speech frames.
     ort.env.wasm.numThreads = 1;
 
+    log.info("pre-warming MicVAD model");
+
     this.vad = await MicVAD.new({
-      // v5 model uses 512-sample frames (32ms) vs legacy's fixed 1536 (96ms),
-      // giving 3× faster gate response at the cost of a slightly larger model file.
       ...getDefaultRealTimeVADOptions("v5"),
       audioContext,
       baseAssetPath: VAD_BASE_PATH,
       onnxWASMBasePath: VAD_BASE_PATH,
       startOnLoad: false,
-      // Provide the existing stream instead of calling getUserMedia
+      // Stream is provided via activeStream at start() time
+      // eslint-disable-next-line @typescript-eslint/require-await
+      getStream: async (): Promise<MediaStream> => {
+        if (!this.activeStream) throw new Error("[VAD] stream not set — call start() first");
+        return this.activeStream;
+      },
       // eslint-disable-next-line @typescript-eslint/require-await
-      getStream: async (): Promise<MediaStream> => stream,
       pauseStream: async (): Promise<void> => {},
       // eslint-disable-next-line @typescript-eslint/require-await
-      resumeStream: async (): Promise<MediaStream> => stream,
+      resumeStream: async (): Promise<MediaStream> => {
+        if (!this.activeStream) throw new Error("[VAD] stream not set");
+        return this.activeStream;
+      },
       onFrameProcessed: (probabilities: { isSpeech: number; notSpeech: number }): void => {
         const p = probabilities.isSpeech;
         if (!this.gateOpen && p >= this.options.positiveThreshold) {
@@ -92,10 +93,27 @@ export class SileroVADGate {
       onSpeechRealStart: (): void => {},
     });
 
+    log.info("MicVAD model loaded");
+  }
+
+  /**
+   * Phase 2 — wire the raw mic stream and begin classifying frames.
+   * init() must have completed first.
+   */
+  public async start(stream: MediaStream): Promise<void> {
+    if (!this.vad) throw new Error("[VAD] call init() before start()");
+    this.activeStream = stream;
+    this.gateOpen = true; // start open — first silent frame will close it
     await this.vad.start();
     log.info("MicVAD started");
   }
 
+  /** Pause frame processing without destroying the model. */
+  public async stop(): Promise<void> {
+    if (this.vad) await this.vad.pause();
+    this.activeStream = null;
+  }
+
   public updateOptions(options: SileroVADGateOptions): void {
     this.options = options;
   }
@@ -105,5 +123,6 @@ export class SileroVADGate {
       await this.vad.destroy();
       this.vad = null;
     }
+    this.activeStream = null;
   }
 }
diff --git a/src/settings/SettingsModal.tsx b/src/settings/SettingsModal.tsx
index e01139a3..e9d9daba 100644
--- a/src/settings/SettingsModal.tsx
+++ b/src/settings/SettingsModal.tsx
@@ -336,12 +336,11 @@ export const SettingsModal: FC<Props> = ({
               id="vadEnabled"
               type="checkbox"
               label="Enable voice activity detection"
-              description="Uses the Silero VAD model to mute audio when no speech is detected. Requires the noise gate to be enabled."
+              description="Uses the Silero VAD model to mute audio when no speech is detected."
               checked={vadActive}
               onChange={(e: ChangeEvent<HTMLInputElement>): void =>
                 setVadActive(e.target.checked)
               }
-              disabled={!noiseGateEnabled}
             />
           </FieldRow>
           {vadActive && (
diff --git a/src/state/CallViewModel/localMember/Publisher.ts b/src/state/CallViewModel/localMember/Publisher.ts
index fd82b767..d3c3f4cc 100644
--- a/src/state/CallViewModel/localMember/Publisher.ts
+++ b/src/state/CallViewModel/localMember/Publisher.ts
@@ -439,10 +439,15 @@ export class Publisher {
 
     let transformer: NoiseGateTransformer | null = null;
     let audioCtx: AudioContext | null = null;
-    let vadGate: SileroVADGate | null = null;
+    // Single VAD gate instance — persists across start/stop to keep model warm
+    let vadGate: SileroVADGate | null = new SileroVADGate({
+      positiveThreshold: vadPositiveThreshold.getValue(),
+      negativeThreshold: vadNegativeThreshold.getValue(),
+    });
     let rawMicTrack: MediaStreamTrack | null = null;
 
     const currentParams = (): NoiseGateParams => ({
+      noiseGateActive: noiseGateEnabled.getValue(),
       threshold: noiseGateThreshold.getValue(),
       attackMs: noiseGateAttack.getValue(),
       holdMs: noiseGateHold.getValue(),
@@ -454,33 +459,29 @@ export class Publisher {
 
     const stopVAD = (): void => {
       if (vadGate) {
-        void vadGate.destroy();
-        vadGate = null;
+        void vadGate.stop();
       }
       // Always reopen gate when VAD stops so audio flows without VAD
       transformer?.setVADOpen(true);
     };
 
-    const startVAD = (rawTrack: MediaStreamTrack, ctx: AudioContext): void => {
-      stopVAD();
+    const startVAD = (rawTrack: MediaStreamTrack): void => {
+      if (!vadGate) return;
       const stream = new MediaStream([rawTrack]);
-      vadGate = new SileroVADGate(stream, ctx, {
-        positiveThreshold: vadPositiveThreshold.getValue(),
-        negativeThreshold: vadNegativeThreshold.getValue(),
-      });
       vadGate.onOpen  = (): void => transformer?.setVADOpen(true);
       vadGate.onClose = (): void => transformer?.setVADOpen(false);
-      vadGate.start().catch((e: unknown) => {
+      vadGate.start(stream).catch((e: unknown) => {
         this.logger.error("[VAD] failed to start", e);
       });
     };
 
-    // Attach / detach processor when enabled state or the track changes.
-    combineLatest([audioTrack$, noiseGateEnabled.value$])
+    // Attach / detach processor when noise gate or VAD enabled state or the track changes.
+    combineLatest([audioTrack$, noiseGateEnabled.value$, vadEnabled.value$])
       .pipe(scope.bind())
-      .subscribe(([audioTrack, enabled]) => {
+      .subscribe(([audioTrack, ngEnabled, vadActive]) => {
         if (!audioTrack) return;
-        if (enabled && !audioTrack.getProcessor()) {
+        const shouldAttach = ngEnabled || vadActive;
+        if (shouldAttach && !audioTrack.getProcessor()) {
           const params = currentParams();
           this.logger.info("[NoiseGate] attaching processor, params:", params);
           // Capture the raw mic track BEFORE setProcessor replaces it
@@ -491,6 +492,12 @@ export class Publisher {
           this.logger.info("[NoiseGate] AudioContext state before resume:", audioCtx.state);
           // eslint-disable-next-line @typescript-eslint/no-explicit-any
           (audioTrack as any).setAudioContext(audioCtx);
+          // Pre-warm VAD model as soon as AudioContext is created
+          if (vadGate && audioCtx) {
+            vadGate.init(audioCtx).catch((e: unknown) => {
+              this.logger.error("[VAD] failed to pre-warm model", e);
+            });
+          }
           audioCtx.resume().then(async () => {
             this.logger.info("[NoiseGate] AudioContext state after resume:", audioCtx?.state);
             return audioTrack
@@ -498,11 +505,11 @@ export class Publisher {
               .setProcessor(transformer as any);
           }).then(() => {
             this.logger.info("[NoiseGate] setProcessor resolved");
-            if (vadEnabled.getValue() && audioCtx && rawMicTrack) startVAD(rawMicTrack, audioCtx);
+            if (vadActive && rawMicTrack) startVAD(rawMicTrack);
           }).catch((e: unknown) => {
             this.logger.error("[NoiseGate] setProcessor failed", e);
           });
-        } else if (!enabled && audioTrack.getProcessor()) {
+        } else if (!shouldAttach && audioTrack.getProcessor()) {
           this.logger.info("[NoiseGate] removing processor");
           stopVAD();
           void audioTrack.stopProcessor();
@@ -512,18 +519,21 @@ export class Publisher {
           rawMicTrack = null;
           // eslint-disable-next-line @typescript-eslint/no-explicit-any
           (audioTrack as any).setAudioContext(undefined);
+        } else if (shouldAttach && audioTrack.getProcessor()) {
+          // Processor already attached — push updated params (e.g. noiseGateActive toggled)
+          transformer?.updateParams(currentParams());
         } else {
-          this.logger.info("[NoiseGate] tick — enabled:", enabled, "hasProcessor:", !!audioTrack.getProcessor());
+          this.logger.info("[NoiseGate] tick — ngEnabled:", ngEnabled, "vadActive:", vadActive, "hasProcessor:", !!audioTrack.getProcessor());
         }
       });
 
     // Start/stop VAD when its toggle changes.
     combineLatest([audioTrack$, vadEnabled.value$])
       .pipe(scope.bind())
-      .subscribe(([audioTrack, enabled]) => {
-        if (!audioCtx || !rawMicTrack) return;
+      .subscribe(([, enabled]) => {
+        if (!rawMicTrack) return;
         if (enabled) {
-          startVAD(rawMicTrack, audioCtx);
+          startVAD(rawMicTrack);
         } else {
           stopVAD();
         }
@@ -538,6 +548,7 @@ export class Publisher {
 
     // Push param changes to the live worklet without recreating the processor.
     combineLatest([
+      noiseGateEnabled.value$,
       noiseGateThreshold.value$,
       noiseGateAttack.value$,
       noiseGateHold.value$,
@@ -547,13 +558,21 @@ export class Publisher {
       transientRelease.value$,
     ])
       .pipe(scope.bind())
-      .subscribe(([threshold, attackMs, holdMs, releaseMs,
+      .subscribe(([noiseGateActive, threshold, attackMs, holdMs, releaseMs,
                    transientEnabled, transientThresholdDb, transientReleaseMs]) => {
         transformer?.updateParams({
-          threshold, attackMs, holdMs, releaseMs,
+          noiseGateActive, threshold, attackMs, holdMs, releaseMs,
           transientEnabled, transientThresholdDb, transientReleaseMs,
         });
       });
+
+    // Destroy VAD gate when scope ends (processor fully torn down)
+    scope.onEnd(() => {
+      if (vadGate) {
+        void vadGate.destroy();
+        vadGate = null;
+      }
+    });
   }
 
   private observeTrackProcessors(