From 5a25e212d7d045aacdf36d5d4e39f29abac0bd55 Mon Sep 17 00:00:00 2001 From: itsrubberduck Date: Sun, 15 Feb 2026 01:01:02 +0100 Subject: [PATCH] fix(classroom): audio speed slider now actually changes playback speed with pitch correction - Fix client: playbackRate was set to 1 for non-native-speed providers (Speaches/Piper), making the speed slider ineffective in the main Pizzicato audio path - Fix server: pass speed parameter to Speaches TTS API - Add pitch-preserving playback via MediaElementSourceNode when rate != 1, routing through the same Web Audio effects chain (radio filters, distortion, etc.) Co-Authored-By: Claude Opus 4.6 --- app/pages/classroom.vue | 7 +- server/api/atc/say.post.ts | 8 ++- shared/utils/pizzicatoLite.ts | 121 ++++++++++++++++++++++++++++------ 3 files changed, 108 insertions(+), 28 deletions(-) diff --git a/app/pages/classroom.vue b/app/pages/classroom.vue index 68910d9..5284b96 100644 --- a/app/pages/classroom.vue +++ b/app/pages/classroom.vue @@ -4143,9 +4143,8 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n const basePlaybackRate = supportsNativeSpeed ? clampRate(desiredRate / (nativeRate || 1), 0.5, 2) : desiredRate - const needsRateFallback = !supportsNativeSpeed && Math.abs(basePlaybackRate - 1) > 0.0001 - const playbackRate = needsRateFallback ? 1 : basePlaybackRate - const htmlPlaybackRate = needsRateFallback ? basePlaybackRate : playbackRate + const playbackRate = basePlaybackRate + const htmlPlaybackRate = basePlaybackRate const playWithoutEffects = async () => { if (token !== activePlaybackToken) return @@ -4179,7 +4178,7 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n throw new Error('Audio engine unavailable') } - const sound = await pizzicato.createSoundFromBase64(ctx, source.base64) + const sound = await pizzicato.createSoundFromBase64(ctx, source.base64, mime) if (token !== activePlaybackToken) { sound.clearEffects() return diff --git a/server/api/atc/say.post.ts b/server/api/atc/say.post.ts index 057bf94..50b2c77 100644 --- a/server/api/atc/say.post.ts +++ b/server/api/atc/say.post.ts @@ -89,7 +89,8 @@ async function speachesTTS( voice: string, model: string, response_format: AudioFmt, - baseUrl: string + baseUrl: string, + speed: number = 1.0 ): Promise { const url = `${baseUrl.replace(/\/+$/, "")}/v1/audio/speech`; const body = { @@ -97,7 +98,8 @@ async function speachesTTS( model, voice, // API erwartet "response_format": "mp3" | "flac" | "wav" | "pcm" - response_format + response_format, + speed }; const res = await fetch(url, { method: "POST", @@ -174,7 +176,7 @@ export default defineEventHandler(async (event) => { if (!baseUrl) { throw new Error("SPEACHES_BASE_URL not set"); } - audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl); + audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl, speed); modelUsed = model; // Server returns the correct format according to response_format actualMime = fmtToMime(fmt); diff --git a/shared/utils/pizzicatoLite.ts b/shared/utils/pizzicatoLite.ts index 3faa7ff..709a44c 100644 --- a/shared/utils/pizzicatoLite.ts +++ b/shared/utils/pizzicatoLite.ts @@ -13,7 +13,7 @@ export type SupportedEffect = | TremoloEffect export interface PizzicatoLite { - createSoundFromBase64(context: AudioContext, base64: string): Promise + createSoundFromBase64(context: AudioContext, base64: string, mime?: string): Promise Effects: { HighPassFilter: typeof HighPassFilterEffect LowPassFilter: typeof LowPassFilterEffect @@ -46,18 +46,30 @@ const createDistortionCurve = (amount: number, context: AudioContext) => { return curve } +const setPreservesPitch = (media: HTMLMediaElement, preserve: boolean) => { + try { (media as any).preservesPitch = preserve } catch { /* unsupported */ } + try { (media as any).mozPreservesPitch = preserve } catch { /* unsupported */ } + try { (media as any).webkitPreservesPitch = preserve } catch { /* unsupported */ } +} + class PizzicatoSound { private context: AudioContext private buffer: AudioBuffer + private base64: string + private mime: string private outputNode: GainNode private sourceNode: AudioBufferSourceNode | null = null + private mediaElement: HTMLAudioElement | null = null + private mediaSourceNode: MediaElementAudioSourceNode | null = null private effects: EffectNode[] = [] private isPlaying = false - private playbackRate = 1 + private _playbackRate = 1 - constructor(context: AudioContext, buffer: AudioBuffer) { + constructor(context: AudioContext, buffer: AudioBuffer, base64: string, mime: string) { this.context = context this.buffer = buffer + this.base64 = base64 + this.mime = mime this.outputNode = context.createGain() this.outputNode.gain.value = 1 this.outputNode.connect(this.context.destination) @@ -81,25 +93,33 @@ class PizzicatoSound { } setPlaybackRate(value: number) { - const clamped = clamp(value, 0.25, 4) - this.playbackRate = clamped - if (this.sourceNode) { - try { - this.sourceNode.playbackRate.value = clamped - } catch { - // ignore rate assignment errors - } + this._playbackRate = clamp(value, 0.25, 4) + if (this.mediaElement) { + this.mediaElement.playbackRate = this._playbackRate } } + private get needsRateAdjustment(): boolean { + return Math.abs(this._playbackRate - 1) > 0.001 + } + async play(): Promise { if (this.isPlaying) { this.stop() } + // Use MediaElement path when rate != 1 so preservesPitch works. + // BufferSource path is used at normal speed (no pitch issue). + if (this.needsRateAdjustment) { + return this.playViaMediaElement() + } + return this.playViaBuffer() + } + + private async playViaBuffer(): Promise { const source = this.context.createBufferSource() source.buffer = this.buffer - source.playbackRate.value = this.playbackRate + source.playbackRate.value = 1 const connectedNodes: AudioNode[] = [] @@ -141,15 +161,74 @@ class PizzicatoSound { }) } - stop() { - if (!this.isPlaying || !this.sourceNode) { - return + private async playViaMediaElement(): Promise { + const audio = new Audio(`data:${this.mime};base64,${this.base64}`) + audio.playbackRate = this._playbackRate + setPreservesPitch(audio, true) + + // MediaElementSourceNode routes audio through Web Audio effects chain + const mediaSource = this.context.createMediaElementSource(audio) + + const connectedNodes: AudioNode[] = [] + + let currentNode: AudioNode = mediaSource + for (const effect of this.effects) { + connectedNodes.push(currentNode) + currentNode.connect(effect.inputNode) + currentNode = effect.outputNode + effect.onActivate?.() } - try { - this.sourceNode.stop() - } catch { - // ignore stop errors + connectedNodes.push(currentNode) + currentNode.connect(this.outputNode) + + this.mediaElement = audio + this.mediaSourceNode = mediaSource + this.isPlaying = true + + return new Promise((resolve) => { + const cleanup = () => { + this.isPlaying = false + connectedNodes.forEach(node => { + try { + node.disconnect() + } catch { + // ignore disconnect errors + } + }) + this.effects.forEach(effect => effect.onDeactivate?.()) + this.mediaElement = null + this.mediaSourceNode = null + resolve() + } + + audio.onended = cleanup + audio.onerror = cleanup + + audio.play().catch(() => { + cleanup() + }) + }) + } + + stop() { + if (!this.isPlaying) return + + if (this.sourceNode) { + try { + this.sourceNode.stop() + } catch { + // ignore stop errors + } + } + + if (this.mediaElement) { + try { + this.mediaElement.pause() + this.mediaElement.currentTime = 0 + } catch { + // ignore stop errors + } } } } @@ -359,10 +438,10 @@ export const loadPizzicatoLite = async (): Promise => { if (cachedInstance) return cachedInstance const instance: PizzicatoLite = { - async createSoundFromBase64(context: AudioContext, base64: string) { + async createSoundFromBase64(context: AudioContext, base64: string, mime: string = 'audio/wav') { const arrayBuffer = decodeBase64ToArrayBuffer(base64) const audioBuffer = await context.decodeAudioData(arrayBuffer.slice(0)) - return new PizzicatoSound(context, audioBuffer) + return new PizzicatoSound(context, audioBuffer, base64, mime) }, Effects: { HighPassFilter: HighPassFilterEffect,