mirror of
https://github.com/OpenSquawk/OpenSquawk
synced 2026-06-27 19:05:48 +08:00
Usage tracking: - new UsageEvent collection records every STT/TTS/LLM call per user with provider, model, volume (audio seconds, characters, tokens) and an estimated USD cost; self-hosted providers (Speaches/Piper) and cache hits record at $0 - pricing table for whisper-1, tts-1, gpt-5-nano & co. in server/utils/usage.ts - weekly KPI mail gains an "AI-Nutzung & Kosten" section: weekly and rolling 30-day cost, per-kind breakdown, top 5 users by cost - quota alert mail when rolling 30-day cost exceeds USAGE_ALERT_USD (default $5), at most once per calendar month (UsageAlertDelivery) Hardening: - /api/atc/say now requires an authenticated session (middleware exemption removed); useFlightLabAudio sends the bearer token - /api/service/tools/latency requires auth (was a public LLM endpoint) - per-user rate limits: PTT 20/min, say 60/min, latency 5/min - cron endpoints (waitlist-drip, weekly-kpi-report) require a shared secret via ?secret= or x-cron-secret (CRON_SECRET, falls back to KPI_CRON_SECRET); allowed with a warning while unset so existing deployments keep working - PTT records the actual transcribed audio duration for billing accuracy Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
66 lines
1.8 KiB
TypeScript
66 lines
1.8 KiB
TypeScript
import type { H3Event } from 'h3'
|
|
import { createError } from 'h3'
|
|
|
|
interface WindowEntry {
|
|
windowStart: number
|
|
count: number
|
|
}
|
|
|
|
// In-memory fixed-window limiter. Per process — good enough for a single
|
|
// Coolify instance; swap for a shared store if the app ever scales out.
|
|
const windows = new Map<string, WindowEntry>()
|
|
|
|
const CLEANUP_INTERVAL_MS = 10 * 60 * 1000
|
|
let lastCleanup = Date.now()
|
|
|
|
function cleanup(now: number, windowMs: number) {
|
|
if (now - lastCleanup < CLEANUP_INTERVAL_MS) return
|
|
lastCleanup = now
|
|
for (const [key, entry] of windows) {
|
|
if (now - entry.windowStart > windowMs * 2) {
|
|
windows.delete(key)
|
|
}
|
|
}
|
|
}
|
|
|
|
export function getClientIp(event: H3Event): string {
|
|
const forwarded = event.node.req.headers['x-forwarded-for']
|
|
if (typeof forwarded === 'string' && forwarded.trim()) {
|
|
return forwarded.split(',')[0]!.trim()
|
|
}
|
|
return event.node.req.socket?.remoteAddress || 'unknown'
|
|
}
|
|
|
|
/**
|
|
* Throws 429 when `key` exceeds `limit` requests per `windowMs`.
|
|
* Use a user id as key where available, client IP otherwise.
|
|
*/
|
|
export function enforceRateLimit(
|
|
event: H3Event,
|
|
bucket: string,
|
|
key: string,
|
|
limit: number,
|
|
windowMs = 60_000,
|
|
) {
|
|
const now = Date.now()
|
|
cleanup(now, windowMs)
|
|
|
|
const mapKey = `${bucket}:${key}`
|
|
const entry = windows.get(mapKey)
|
|
|
|
if (!entry || now - entry.windowStart >= windowMs) {
|
|
windows.set(mapKey, { windowStart: now, count: 1 })
|
|
return
|
|
}
|
|
|
|
entry.count += 1
|
|
if (entry.count > limit) {
|
|
const retryAfter = Math.ceil((entry.windowStart + windowMs - now) / 1000)
|
|
event.node.res.setHeader('Retry-After', String(Math.max(retryAfter, 1)))
|
|
throw createError({
|
|
statusCode: 429,
|
|
statusMessage: 'Too many requests — slow down and try again shortly.',
|
|
})
|
|
}
|
|
}
|