Files
OpenSquawk/server/utils/usageAlert.ts
leubeem f8fdd8bc79 feat(server): per-user AI usage tracking, cost alerting, and endpoint hardening
Usage tracking:
- new UsageEvent collection records every STT/TTS/LLM call per user with
  provider, model, volume (audio seconds, characters, tokens) and an
  estimated USD cost; self-hosted providers (Speaches/Piper) and cache
  hits record at $0
- pricing table for whisper-1, tts-1, gpt-5-nano & co. in server/utils/usage.ts
- weekly KPI mail gains an "AI-Nutzung & Kosten" section: weekly and
  rolling 30-day cost, per-kind breakdown, top 5 users by cost
- quota alert mail when rolling 30-day cost exceeds USAGE_ALERT_USD
  (default $5), at most once per calendar month (UsageAlertDelivery)

Hardening:
- /api/atc/say now requires an authenticated session (middleware
  exemption removed); useFlightLabAudio sends the bearer token
- /api/service/tools/latency requires auth (was a public LLM endpoint)
- per-user rate limits: PTT 20/min, say 60/min, latency 5/min
- cron endpoints (waitlist-drip, weekly-kpi-report) require a shared
  secret via ?secret= or x-cron-secret (CRON_SECRET, falls back to
  KPI_CRON_SECRET); allowed with a warning while unset so existing
  deployments keep working
- PTT records the actual transcribed audio duration for billing accuracy

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 23:17:03 +02:00

74 lines
2.7 KiB
TypeScript

import { UsageAlertDelivery } from '../models/UsageAlertDelivery'
import { sendMail } from './notifications'
import { getRollingCostUsd, summarizeUsage } from './usage'
const DEFAULT_THRESHOLD_USD = 5
const DEFAULT_RECIPIENT = 'opensquawk-kpi@faktorxmensch.com'
function getThresholdUsd() {
const parsed = Number.parseFloat(process.env.USAGE_ALERT_USD || '')
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_THRESHOLD_USD
}
/**
* Sends an alert mail when the rolling 30-day AI cost crosses the threshold
* (USAGE_ALERT_USD, default $5). At most one alert per calendar month.
* Designed to be called from cron endpoints — never throws.
*/
export async function maybeSendUsageQuotaAlert(now = new Date()) {
try {
const thresholdUsd = getThresholdUsd()
const costUsd = await getRollingCostUsd(30, now)
if (costUsd < thresholdUsd) {
return { sent: false, costUsd, thresholdUsd }
}
const monthKey = now.toISOString().slice(0, 7)
const alreadySent = await UsageAlertDelivery.exists({ monthKey })
if (alreadySent) {
return { sent: false, skipped: 'already-sent', costUsd, thresholdUsd }
}
const recipient = process.env.KPI_EMAIL_TO || DEFAULT_RECIPIENT
const periodStart = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000)
const summary = await summarizeUsage(periodStart, now)
const topUsers = summary.topUsers.length
? summary.topUsers.map((u) => `- ${u.email}: $${u.costUsd.toFixed(4)} (${u.events} Requests)`).join('\n')
: '- keine User zugeordnet'
const text = [
`Die geschätzten AI-Kosten der letzten 30 Tage liegen bei $${costUsd.toFixed(4)} und haben die Schwelle von $${thresholdUsd.toFixed(2)} überschritten.`,
'',
`STT: ${Math.round(summary.sttSeconds / 60)} Minuten Audio ($${summary.byKind.stt.costUsd.toFixed(4)})`,
`TTS: ${summary.ttsCharacters} Zeichen ($${summary.byKind.tts.costUsd.toFixed(4)})`,
`LLM: ${summary.llmInputTokens} in / ${summary.llmOutputTokens} out Tokens ($${summary.byKind.llm.costUsd.toFixed(4)})`,
'',
'Top User nach Kosten:',
topUsers,
'',
'Es wird maximal eine Warnung pro Kalendermonat verschickt.',
].join('\n')
const mailAccepted = await sendMail({
to: recipient,
subject: `OpenSquawk Kosten-Alarm: $${costUsd.toFixed(2)} in 30 Tagen (Schwelle $${thresholdUsd.toFixed(2)})`,
text,
})
await UsageAlertDelivery.create({
monthKey,
thresholdUsd,
costUsd,
recipient,
sentAt: now,
})
return { sent: true, mailAccepted, costUsd, thresholdUsd }
} catch (error) {
console.warn('[usage-alert] Quota alert check failed', error)
return { sent: false, error: String(error) }
}
}