OpenSquawk/server/api/atc/say.post.ts

// server/api/atc/say.post.ts
import {createError, readBody} from "h3";
import {writeFile, mkdir} from "node:fs/promises";
import {existsSync} from "node:fs";
import {join} from "node:path";
import {randomUUID} from "node:crypto";
import {normalize, TTS_MODEL, normalizeATC} from "../../utils/normalize";
import { getServerRuntimeConfig } from "../../utils/runtimeConfig";
import {request} from "node:http";
import { TransmissionLog } from "../../models/TransmissionLog";
import { requireUserSession } from "../../utils/auth";


function outDir() {
    return process.env.ATC_OUT_DIR?.trim() || join(process.cwd(), "storage", "atc");
}

async function ensureDir(p: string) {
    if (!existsSync(p)) await mkdir(p, { recursive: true });
}

function simulateRadioQuality(level: number) {
    switch (level) {
        case 5: return { gain: 1.0, description: "crystal clear" };
        case 4: return { gain: 0.9, description: "very good" };
        case 3: return { gain: 0.8, description: "good" };
        case 2: return { gain: 0.7, description: "poor" };
        case 1: return { gain: 0.6, description: "very poor" };
        default: return { gain: 0.8, description: "standard" };
    }
}

// ---- Format Helpers ----
type AudioFmt = "mp3" | "flac" | "wav" | "pcm";
function pickDefaultFormat(useSpeaches: boolean): AudioFmt {
    // kleinste Bitrate bevorzugen, wenn Speaches genutzt wird
    return useSpeaches ? "mp3" : "wav";
}
function fmtToMime(fmt: AudioFmt): string {
    switch (fmt) {
        case "mp3": return "audio/mpeg";
        case "flac": return "audio/flac";
        case "wav": return "audio/wav";
        case "pcm": return "audio/L16"; // raw PCM (fallback)
        default: return "application/octet-stream";
    }
}
function fmtToExt(fmt: AudioFmt): string {
    switch (fmt) {
        case "mp3": return "mp3";
        case "flac": return "flac";
        case "wav": return "wav";
        case "pcm": return "pcm";
        default: return "bin";
    }
}

// ---- Piper HTTP helper ----
async function piperTTS(text: string, voice: string, port: number): Promise<Buffer> {
    return new Promise((resolve, reject) => {
        const req = request(
            {
                hostname: "localhost",
                port,
                path: "/",
                method: "POST",
                headers: { "Content-Type": "application/json" }
            },
            (res) => {
                const data: Buffer[] = [];
                res.on("data", (chunk) => data.push(chunk));
                res.on("end", () => resolve(Buffer.concat(data)));
            }
        );
        req.on("error", reject);
        req.write(JSON.stringify({ text, voice }));
        req.end();
    });
}

// ---- Speaches HTTP helper ----
// Env:
// USE_SPEACHES=true
// SPEACHES_BASE_URL="https://..."
// SPEECH_MODEL_ID="speaches-ai/piper-en_US-ryan-low"
// VOICE_ID="en_US-ryan-low"
async function speachesTTS(
    input: string,
    voice: string,
    model: string,
    response_format: AudioFmt,
    baseUrl: string
): Promise<Buffer> {
    const url = `${baseUrl.replace(/\/+$/, "")}/v1/audio/speech`;
    const body = {
        input,
        model,
        voice,
        // API erwartet "response_format": "mp3" | "flac" | "wav" | "pcm"
        response_format
    };
    const res = await fetch(url, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify(body)
    });
    if (!res.ok) {
        const text = await res.text().catch(() => "");
        throw new Error(`Speaches API ${res.status}: ${text || res.statusText}`);
    }
    const arr = await res.arrayBuffer();
    return Buffer.from(arr);
}

export default defineEventHandler(async (event) => {
    const runtimeConfig = getServerRuntimeConfig();
    const body = await readBody<{
        text?: string;
        level?: number;
        voice?: string;
        speed?: number;
        moduleId?: string;
        lessonId?: string;
        tag?: string;
        format?: AudioFmt | "smallest";
    }>(event);

    const user = await requireUserSession(event);

    const raw = (body?.text || "").trim();
    if (!raw) throw createError({ statusCode: 400, statusMessage: "text required" });

    const level = Math.max(1, Math.min(5, Math.floor(body?.level ?? 4)));
    const voice = (body?.voice || runtimeConfig.voiceId).trim();
    const speed = Math.max(0.5, Math.min(2.0, body?.speed || 1.0));

    const normalized = normalizeATC(raw);
    if (!normalized) throw createError({ statusCode: 400, statusMessage: "normalized text empty" });

    // Routing
    const useSpeaches = runtimeConfig.useSpeaches;
    const usePiper = !useSpeaches && runtimeConfig.usePiper;

    // Format
    const requestedFmt = (body?.format === "smallest" ? "mp3" : body?.format) as AudioFmt | undefined;
    const fmt: AudioFmt = requestedFmt || pickDefaultFormat(useSpeaches);
    const mime = fmtToMime(fmt);
    const ext = fmtToExt(fmt);

    const radioQuality = simulateRadioQuality(level);
    const id = randomUUID();
    const timestamp = new Date().toISOString();
    const dateFolder = timestamp.slice(0, 10);
    const baseDir = join(outDir(), dateFolder);
    const fileOut = join(baseDir, `${id}.${ext}`);
    const fileJson = join(baseDir, `${id}.json`);

    try {
        let audioBuffer: Buffer;
        let modelUsed: string;
        let actualMime = mime;

        if (useSpeaches) {
            // Speaches (prefer compact: MP3, otherwise FLAC/WAV/PCM)
            const baseUrl = runtimeConfig.speachesBaseUrl || "";
            const model = runtimeConfig.speechModelId || "speaches-ai/piper-en_US-ryan-low";
            if (!baseUrl) {
                throw new Error("SPEACHES_BASE_URL not set");
            }
            audioBuffer = await speachesTTS(normalized, voice, model, fmt, baseUrl);
            modelUsed = model;
            // Server returns the correct format according to response_format
            actualMime = fmtToMime(fmt);
        } else if (usePiper) {
            // Local Piper
            audioBuffer = await piperTTS(normalized, voice, runtimeConfig.piperPort);
            modelUsed = "piper-local";
            // Piper returns WAV
            actualMime = "audio/wav";
        } else {
            // OpenAI (fallback)
            const tts = await normalize.audio.speech.create({
                model: TTS_MODEL,
                voice,
                format: "wav",
                input: normalized,
                speed
            });
            audioBuffer = Buffer.from(await tts.arrayBuffer());
            modelUsed = TTS_MODEL;
            actualMime = "audio/wav";
        }

        // Optional persistence
        // await ensureDir(baseDir);
        // await writeFile(fileOut, audioBuffer);
        const meta = {
            id,
            createdAt: timestamp,
            level,
            voice,
            speed,
            text: raw,
            normalized,
            radioQuality: radioQuality.description,
            tag: body?.tag || null,
            moduleId: body?.moduleId || null,
            lessonId: body?.lessonId || null,
            files: { audio: fileOut },
            model: modelUsed,
            format: actualMime
        };

        // await writeFile(fileJson, JSON.stringify(meta, null, 2), "utf-8");

        try {
            await TransmissionLog.create({
                user: user._id,
                role: "atc",
                channel: "say",
                direction: "outgoing",
                text: raw,
                normalized,
                metadata: {
                    level,
                    voice,
                    speed,
                    moduleId: body?.moduleId || null,
                    lessonId: body?.lessonId || null,
                    tag: body?.tag || null,
                    radioQuality: radioQuality.description,
                }
            })
        } catch (logError) {
            console.warn("Transmission logging failed", logError)
        }

        return {
            success: true,
            id,
            level,
            voice,
            speed,
            text: raw,
            normalized,
            radioQuality: radioQuality.description,
            audio: {
                mime: actualMime,
                base64: audioBuffer.toString("base64"),
                size: audioBuffer.length,
                ext
            },
            stored: {
                audioPath: fileOut,
                jsonPath: fileJson,
                url: `/api/atc/audio/${dateFolder}/${id}.${ext}`
            },
            meta
        };
    } catch (err: any) {
        throw createError({
            statusCode: 500,
            statusMessage: `TTS generation failed: ${err?.message || err}`
        });
    }
});