fix(classroom): audio speed slider now actually changes playback speed with pitch correction

- Fix client: playbackRate was set to 1 for non-native-speed providers (Speaches/Piper),
  making the speed slider ineffective in the main Pizzicato audio path
- Fix server: pass speed parameter to Speaches TTS API
- Add pitch-preserving playback via MediaElementSourceNode when rate != 1,
  routing through the same Web Audio effects chain (radio filters, distortion, etc.)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
itsrubberduck
2026-02-15 01:01:02 +01:00
parent ea51e1dcc3
commit 5a25e212d7
3 changed files with 108 additions and 28 deletions

View File

@@ -4143,9 +4143,8 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n
const basePlaybackRate = supportsNativeSpeed
? clampRate(desiredRate / (nativeRate || 1), 0.5, 2)
: desiredRate
const needsRateFallback = !supportsNativeSpeed && Math.abs(basePlaybackRate - 1) > 0.0001
const playbackRate = needsRateFallback ? 1 : basePlaybackRate
const htmlPlaybackRate = needsRateFallback ? basePlaybackRate : playbackRate
const playbackRate = basePlaybackRate
const htmlPlaybackRate = basePlaybackRate
const playWithoutEffects = async () => {
if (token !== activePlaybackToken) return
@@ -4179,7 +4178,7 @@ async function playAudioSource(source: CachedAudio, targetRate: number, token: n
throw new Error('Audio engine unavailable')
}
const sound = await pizzicato.createSoundFromBase64(ctx, source.base64)
const sound = await pizzicato.createSoundFromBase64(ctx, source.base64, mime)
if (token !== activePlaybackToken) {
sound.clearEffects()
return

View File

@@ -89,7 +89,8 @@ async function speachesTTS(
voice: string,
model: string,
response_format: AudioFmt,
baseUrl: string
baseUrl: string,
speed: number = 1.0
): Promise<Buffer> {
const url = `${baseUrl.replace(/\/+$/, "")}/v1/audio/speech`;
const body = {
@@ -97,7 +98,8 @@ async function speachesTTS(
model,
voice,
// API erwartet "response_format": "mp3" | "flac" | "wav" | "pcm"
response_format
response_format,
speed
};
const res = await fetch(url, {
method: "POST",
@@ -174,7 +176,7 @@ export default defineEventHandler(async (event) => {
if (!baseUrl) {
throw new Error("SPEACHES_BASE_URL not set");
}
audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl);
audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl, speed);
modelUsed = model;
// Server returns the correct format according to response_format
actualMime = fmtToMime(fmt);

View File

@@ -13,7 +13,7 @@ export type SupportedEffect =
| TremoloEffect
export interface PizzicatoLite {
createSoundFromBase64(context: AudioContext, base64: string): Promise<PizzicatoSound>
createSoundFromBase64(context: AudioContext, base64: string, mime?: string): Promise<PizzicatoSound>
Effects: {
HighPassFilter: typeof HighPassFilterEffect
LowPassFilter: typeof LowPassFilterEffect
@@ -46,18 +46,30 @@ const createDistortionCurve = (amount: number, context: AudioContext) => {
return curve
}
const setPreservesPitch = (media: HTMLMediaElement, preserve: boolean) => {
try { (media as any).preservesPitch = preserve } catch { /* unsupported */ }
try { (media as any).mozPreservesPitch = preserve } catch { /* unsupported */ }
try { (media as any).webkitPreservesPitch = preserve } catch { /* unsupported */ }
}
class PizzicatoSound {
private context: AudioContext
private buffer: AudioBuffer
private base64: string
private mime: string
private outputNode: GainNode
private sourceNode: AudioBufferSourceNode | null = null
private mediaElement: HTMLAudioElement | null = null
private mediaSourceNode: MediaElementAudioSourceNode | null = null
private effects: EffectNode[] = []
private isPlaying = false
private playbackRate = 1
private _playbackRate = 1
constructor(context: AudioContext, buffer: AudioBuffer) {
constructor(context: AudioContext, buffer: AudioBuffer, base64: string, mime: string) {
this.context = context
this.buffer = buffer
this.base64 = base64
this.mime = mime
this.outputNode = context.createGain()
this.outputNode.gain.value = 1
this.outputNode.connect(this.context.destination)
@@ -81,25 +93,33 @@ class PizzicatoSound {
}
setPlaybackRate(value: number) {
const clamped = clamp(value, 0.25, 4)
this.playbackRate = clamped
if (this.sourceNode) {
try {
this.sourceNode.playbackRate.value = clamped
} catch {
// ignore rate assignment errors
}
this._playbackRate = clamp(value, 0.25, 4)
if (this.mediaElement) {
this.mediaElement.playbackRate = this._playbackRate
}
}
private get needsRateAdjustment(): boolean {
return Math.abs(this._playbackRate - 1) > 0.001
}
async play(): Promise<void> {
if (this.isPlaying) {
this.stop()
}
// Use MediaElement path when rate != 1 so preservesPitch works.
// BufferSource path is used at normal speed (no pitch issue).
if (this.needsRateAdjustment) {
return this.playViaMediaElement()
}
return this.playViaBuffer()
}
private async playViaBuffer(): Promise<void> {
const source = this.context.createBufferSource()
source.buffer = this.buffer
source.playbackRate.value = this.playbackRate
source.playbackRate.value = 1
const connectedNodes: AudioNode[] = []
@@ -141,15 +161,74 @@ class PizzicatoSound {
})
}
stop() {
if (!this.isPlaying || !this.sourceNode) {
return
private async playViaMediaElement(): Promise<void> {
const audio = new Audio(`data:${this.mime};base64,${this.base64}`)
audio.playbackRate = this._playbackRate
setPreservesPitch(audio, true)
// MediaElementSourceNode routes audio through Web Audio effects chain
const mediaSource = this.context.createMediaElementSource(audio)
const connectedNodes: AudioNode[] = []
let currentNode: AudioNode = mediaSource
for (const effect of this.effects) {
connectedNodes.push(currentNode)
currentNode.connect(effect.inputNode)
currentNode = effect.outputNode
effect.onActivate?.()
}
try {
this.sourceNode.stop()
} catch {
// ignore stop errors
connectedNodes.push(currentNode)
currentNode.connect(this.outputNode)
this.mediaElement = audio
this.mediaSourceNode = mediaSource
this.isPlaying = true
return new Promise((resolve) => {
const cleanup = () => {
this.isPlaying = false
connectedNodes.forEach(node => {
try {
node.disconnect()
} catch {
// ignore disconnect errors
}
})
this.effects.forEach(effect => effect.onDeactivate?.())
this.mediaElement = null
this.mediaSourceNode = null
resolve()
}
audio.onended = cleanup
audio.onerror = cleanup
audio.play().catch(() => {
cleanup()
})
})
}
stop() {
if (!this.isPlaying) return
if (this.sourceNode) {
try {
this.sourceNode.stop()
} catch {
// ignore stop errors
}
}
if (this.mediaElement) {
try {
this.mediaElement.pause()
this.mediaElement.currentTime = 0
} catch {
// ignore stop errors
}
}
}
}
@@ -359,10 +438,10 @@ export const loadPizzicatoLite = async (): Promise<PizzicatoLite | null> => {
if (cachedInstance) return cachedInstance
const instance: PizzicatoLite = {
async createSoundFromBase64(context: AudioContext, base64: string) {
async createSoundFromBase64(context: AudioContext, base64: string, mime: string = 'audio/wav') {
const arrayBuffer = decodeBase64ToArrayBuffer(base64)
const audioBuffer = await context.decodeAudioData(arrayBuffer.slice(0))
return new PizzicatoSound(context, audioBuffer)
return new PizzicatoSound(context, audioBuffer, base64, mime)
},
Effects: {
HighPassFilter: HighPassFilterEffect,