fix(classroom): audio speed slider now actually changes playback speed with pitch correction

- Fix client: playbackRate was set to 1 for non-native-speed providers (Speaches/Piper), making the speed slider ineffective in the main Pizzicato audio path - Fix server: pass speed parameter to Speaches TTS API - Add pitch-preserving playback via MediaElementSourceNode when rate != 1, routing through the same Web Audio effects chain (radio filters, distortion, etc.) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-13 01:46:08 +08:00 · 2026-02-15 01:01:02 +01:00
parent ea51e1dcc3
commit 5a25e212d7
3 changed files with 108 additions and 28 deletions
--- a/app/pages/classroom.vue
+++ b/app/pages/classroom.vue
@@ -4143,9 +4143,8 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n
  const basePlaybackRate = supportsNativeSpeed
      ? clampRate(desiredRate / (nativeRate || 1), 0.5, 2)
      : desiredRate
-  const needsRateFallback = !supportsNativeSpeed && Math.abs(basePlaybackRate - 1) > 0.0001
-  const playbackRate = needsRateFallback ? 1 : basePlaybackRate
-  const htmlPlaybackRate = needsRateFallback ? basePlaybackRate : playbackRate
+  const playbackRate = basePlaybackRate
+  const htmlPlaybackRate = basePlaybackRate

  const playWithoutEffects = async () => {
    if (token !== activePlaybackToken) return
@@ -4179,7 +4178,7 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n
      throw new Error('Audio engine unavailable')
    }

-    const sound = await pizzicato.createSoundFromBase64(ctx, source.base64)
+    const sound = await pizzicato.createSoundFromBase64(ctx, source.base64, mime)
    if (token !== activePlaybackToken) {
      sound.clearEffects()
      return
--- a/server/api/atc/say.post.ts
+++ b/server/api/atc/say.post.ts
@@ -89,7 +89,8 @@ async function speachesTTS(
    voice: string,
    model: string,
    response_format: AudioFmt,
-    baseUrl: string
+    baseUrl: string,
+    speed: number = 1.0
 ): Promise<Buffer> {
    const url = `${baseUrl.replace(/\/+$/, "")}/v1/audio/speech`;
    const body = {
@@ -97,7 +98,8 @@ async function speachesTTS(
        model,
        voice,
        // API erwartet "response_format": "mp3" | "flac" | "wav" | "pcm"
-        response_format
+        response_format,
+        speed
    };
    const res = await fetch(url, {
        method: "POST",
@@ -174,7 +176,7 @@ export default defineEventHandler(async (event) => {
            if (!baseUrl) {
                throw new Error("SPEACHES_BASE_URL not set");
            }
-            audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl);
+            audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl, speed);
            modelUsed = model;
            // Server returns the correct format according to response_format
            actualMime = fmtToMime(fmt);
--- a/shared/utils/pizzicatoLite.ts
+++ b/shared/utils/pizzicatoLite.ts
@@ -13,7 +13,7 @@ export type SupportedEffect =
  | TremoloEffect

 export interface PizzicatoLite {
-  createSoundFromBase64(context: AudioContext, base64: string): Promise<PizzicatoSound>
+  createSoundFromBase64(context: AudioContext, base64: string, mime?: string): Promise<PizzicatoSound>
  Effects: {
    HighPassFilter: typeof HighPassFilterEffect
    LowPassFilter: typeof LowPassFilterEffect
@@ -46,18 +46,30 @@ const createDistortionCurve = (amount: number, context: AudioContext) => {
  return curve
 }

+const setPreservesPitch = (media: HTMLMediaElement, preserve: boolean) => {
+  try { (media as any).preservesPitch = preserve } catch { /* unsupported */ }
+  try { (media as any).mozPreservesPitch = preserve } catch { /* unsupported */ }
+  try { (media as any).webkitPreservesPitch = preserve } catch { /* unsupported */ }
+}
+
 class PizzicatoSound {
  private context: AudioContext
  private buffer: AudioBuffer
+  private base64: string
+  private mime: string
  private outputNode: GainNode
  private sourceNode: AudioBufferSourceNode | null = null
+  private mediaElement: HTMLAudioElement | null = null
+  private mediaSourceNode: MediaElementAudioSourceNode | null = null
  private effects: EffectNode[] = []
  private isPlaying = false
-  private playbackRate = 1
+  private _playbackRate = 1

-  constructor(context: AudioContext, buffer: AudioBuffer) {
+  constructor(context: AudioContext, buffer: AudioBuffer, base64: string, mime: string) {
    this.context = context
    this.buffer = buffer
+    this.base64 = base64
+    this.mime = mime
    this.outputNode = context.createGain()
    this.outputNode.gain.value = 1
    this.outputNode.connect(this.context.destination)
@@ -81,25 +93,33 @@ class PizzicatoSound {
  }

  setPlaybackRate(value: number) {
-    const clamped = clamp(value, 0.25, 4)
-    this.playbackRate = clamped
-    if (this.sourceNode) {
-      try {
-        this.sourceNode.playbackRate.value = clamped
-      } catch {
-        // ignore rate assignment errors
-      }
+    this._playbackRate = clamp(value, 0.25, 4)
+    if (this.mediaElement) {
+      this.mediaElement.playbackRate = this._playbackRate
    }
  }

+  private get needsRateAdjustment(): boolean {
+    return Math.abs(this._playbackRate - 1) > 0.001
+  }
+
  async play(): Promise<void> {
    if (this.isPlaying) {
      this.stop()
    }

+    // Use MediaElement path when rate != 1 so preservesPitch works.
+    // BufferSource path is used at normal speed (no pitch issue).
+    if (this.needsRateAdjustment) {
+      return this.playViaMediaElement()
+    }
+    return this.playViaBuffer()
+  }
+
+  private async playViaBuffer(): Promise<void> {
    const source = this.context.createBufferSource()
    source.buffer = this.buffer
-    source.playbackRate.value = this.playbackRate
+    source.playbackRate.value = 1

    const connectedNodes: AudioNode[] = []

@@ -141,15 +161,74 @@ class PizzicatoSound {
    })
  }

-  stop() {
-    if (!this.isPlaying || !this.sourceNode) {
-      return
+  private async playViaMediaElement(): Promise<void> {
+    const audio = new Audio(`data:${this.mime};base64,${this.base64}`)
+    audio.playbackRate = this._playbackRate
+    setPreservesPitch(audio, true)
+
+    // MediaElementSourceNode routes audio through Web Audio effects chain
+    const mediaSource = this.context.createMediaElementSource(audio)
+
+    const connectedNodes: AudioNode[] = []
+
+    let currentNode: AudioNode = mediaSource
+    for (const effect of this.effects) {
+      connectedNodes.push(currentNode)
+      currentNode.connect(effect.inputNode)
+      currentNode = effect.outputNode
+      effect.onActivate?.()
    }

-    try {
-      this.sourceNode.stop()
-    } catch {
-      // ignore stop errors
+    connectedNodes.push(currentNode)
+    currentNode.connect(this.outputNode)
+
+    this.mediaElement = audio
+    this.mediaSourceNode = mediaSource
+    this.isPlaying = true
+
+    return new Promise((resolve) => {
+      const cleanup = () => {
+        this.isPlaying = false
+        connectedNodes.forEach(node => {
+          try {
+            node.disconnect()
+          } catch {
+            // ignore disconnect errors
+          }
+        })
+        this.effects.forEach(effect => effect.onDeactivate?.())
+        this.mediaElement = null
+        this.mediaSourceNode = null
+        resolve()
+      }
+
+      audio.onended = cleanup
+      audio.onerror = cleanup
+
+      audio.play().catch(() => {
+        cleanup()
+      })
+    })
+  }
+
+  stop() {
+    if (!this.isPlaying) return
+
+    if (this.sourceNode) {
+      try {
+        this.sourceNode.stop()
+      } catch {
+        // ignore stop errors
+      }
+    }
+
+    if (this.mediaElement) {
+      try {
+        this.mediaElement.pause()
+        this.mediaElement.currentTime = 0
+      } catch {
+        // ignore stop errors
+      }
    }
  }
 }
@@ -359,10 +438,10 @@ export const loadPizzicatoLite = async (): Promise<PizzicatoLite | null> => {
  if (cachedInstance) return cachedInstance

  const instance: PizzicatoLite = {
-    async createSoundFromBase64(context: AudioContext, base64: string) {
+    async createSoundFromBase64(context: AudioContext, base64: string, mime: string = 'audio/wav') {
      const arrayBuffer = decodeBase64ToArrayBuffer(base64)
      const audioBuffer = await context.decodeAudioData(arrayBuffer.slice(0))
-      return new PizzicatoSound(context, audioBuffer)
+      return new PizzicatoSound(context, audioBuffer, base64, mime)
    },
    Effects: {
      HighPassFilter: HighPassFilterEffect,