Files
OpenSquawk/server/utils/rateLimit.ts
leubeem f8fdd8bc79 feat(server): per-user AI usage tracking, cost alerting, and endpoint hardening
Usage tracking:
- new UsageEvent collection records every STT/TTS/LLM call per user with
  provider, model, volume (audio seconds, characters, tokens) and an
  estimated USD cost; self-hosted providers (Speaches/Piper) and cache
  hits record at $0
- pricing table for whisper-1, tts-1, gpt-5-nano & co. in server/utils/usage.ts
- weekly KPI mail gains an "AI-Nutzung & Kosten" section: weekly and
  rolling 30-day cost, per-kind breakdown, top 5 users by cost
- quota alert mail when rolling 30-day cost exceeds USAGE_ALERT_USD
  (default $5), at most once per calendar month (UsageAlertDelivery)

Hardening:
- /api/atc/say now requires an authenticated session (middleware
  exemption removed); useFlightLabAudio sends the bearer token
- /api/service/tools/latency requires auth (was a public LLM endpoint)
- per-user rate limits: PTT 20/min, say 60/min, latency 5/min
- cron endpoints (waitlist-drip, weekly-kpi-report) require a shared
  secret via ?secret= or x-cron-secret (CRON_SECRET, falls back to
  KPI_CRON_SECRET); allowed with a warning while unset so existing
  deployments keep working
- PTT records the actual transcribed audio duration for billing accuracy

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 23:17:03 +02:00

66 lines
1.8 KiB
TypeScript

import type { H3Event } from 'h3'
import { createError } from 'h3'
interface WindowEntry {
windowStart: number
count: number
}
// In-memory fixed-window limiter. Per process — good enough for a single
// Coolify instance; swap for a shared store if the app ever scales out.
const windows = new Map<string, WindowEntry>()
const CLEANUP_INTERVAL_MS = 10 * 60 * 1000
let lastCleanup = Date.now()
function cleanup(now: number, windowMs: number) {
if (now - lastCleanup < CLEANUP_INTERVAL_MS) return
lastCleanup = now
for (const [key, entry] of windows) {
if (now - entry.windowStart > windowMs * 2) {
windows.delete(key)
}
}
}
export function getClientIp(event: H3Event): string {
const forwarded = event.node.req.headers['x-forwarded-for']
if (typeof forwarded === 'string' && forwarded.trim()) {
return forwarded.split(',')[0]!.trim()
}
return event.node.req.socket?.remoteAddress || 'unknown'
}
/**
* Throws 429 when `key` exceeds `limit` requests per `windowMs`.
* Use a user id as key where available, client IP otherwise.
*/
export function enforceRateLimit(
event: H3Event,
bucket: string,
key: string,
limit: number,
windowMs = 60_000,
) {
const now = Date.now()
cleanup(now, windowMs)
const mapKey = `${bucket}:${key}`
const entry = windows.get(mapKey)
if (!entry || now - entry.windowStart >= windowMs) {
windows.set(mapKey, { windowStart: now, count: 1 })
return
}
entry.count += 1
if (entry.count > limit) {
const retryAfter = Math.ceil((entry.windowStart + windowMs - now) / 1000)
event.node.res.setHeader('Retry-After', String(Math.max(retryAfter, 1)))
throw createError({
statusCode: 429,
statusMessage: 'Too many requests — slow down and try again shortly.',
})
}
}