mirror of
https://github.com/OpenSquawk/OpenSquawk
synced 2026-06-29 13:20:09 +08:00
Whisper prompt seeding (per request):
- ptt.post.ts builds the prompt as generic ICAO bias + this state's expected
readback appended LAST (survives the 224-token truncation), in both raw token
form and spoken ICAO form via new radioSpeech.speakToken().
- pm.vue passes the expected phrase + active variable values; classroom.vue
passes the lesson's expected field values.
Per-field readback debug:
- sttMatch.matchTranscriptionToFields returns fields[] (matched/missing + which
view matched) plus normalized/denormalized transcription views.
- useRadioBackend types readback_report on the transmit response.
- pm.vue renders a "Readback check" panel in the right log rail; classroom.vue
renders per-field rows under the STT panel.
Radio-pronunciation fixes (radioSpeech.ts):
- callsign expander handles multi-letter suffixes (DLH6RK -> Lufthansa six Romeo
Kilo).
- toRadioSpeech now expands airports (EDDC -> Echo Delta Delta Charlie).
- bare altitudes >=1000 in a clearance context are spoken ("climb initially
5000" -> "climb initially five thousand feet"); speeds/headings untouched.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
345 lines
12 KiB
TypeScript
345 lines
12 KiB
TypeScript
// Map a Whisper transcription back onto lesson readback fields.
|
||
//
|
||
// Whisper returns natural ATC speech ("lufthansa three five niner runway two
|
||
// five right squawk seven five zero zero"), but the lesson fields store the
|
||
// canonical written form ("DLH359", "25R", "7500"). Matching purely on the
|
||
// raw transcription misses most fields. We therefore build a *denormalized*
|
||
// view of the transcription where spoken digits/letters are folded back to
|
||
// their written tokens, and search both forms when looking for each field's
|
||
// expected value (or any alternative).
|
||
|
||
const SPOKEN_DIGIT: Record<string, string> = {
|
||
zero: '0',
|
||
one: '1', wun: '1',
|
||
two: '2', too: '2',
|
||
three: '3', tree: '3',
|
||
four: '4', fower: '4',
|
||
five: '5', fife: '5',
|
||
six: '6',
|
||
seven: '7',
|
||
eight: '8',
|
||
nine: '9', niner: '9',
|
||
}
|
||
|
||
const SPOKEN_LETTER: Record<string, string> = {
|
||
alfa: 'a', alpha: 'a',
|
||
bravo: 'b',
|
||
charlie: 'c',
|
||
delta: 'd',
|
||
echo: 'e',
|
||
foxtrot: 'f',
|
||
golf: 'g',
|
||
hotel: 'h',
|
||
india: 'i',
|
||
juliett: 'j', juliet: 'j',
|
||
kilo: 'k',
|
||
lima: 'l',
|
||
mike: 'm',
|
||
november: 'n',
|
||
oscar: 'o',
|
||
papa: 'p',
|
||
quebec: 'q',
|
||
romeo: 'r',
|
||
sierra: 's',
|
||
tango: 't',
|
||
uniform: 'u',
|
||
victor: 'v',
|
||
whiskey: 'w', whisky: 'w',
|
||
xray: 'x',
|
||
yankee: 'y',
|
||
zulu: 'z',
|
||
}
|
||
|
||
const RUNWAY_SUFFIX: Record<string, string> = {
|
||
left: 'l',
|
||
right: 'r',
|
||
center: 'c', centre: 'c',
|
||
}
|
||
|
||
const SCALE_WORDS: Record<string, string> = {
|
||
hundred: '00',
|
||
thousand: '000',
|
||
}
|
||
|
||
// Decorative words that should be stripped AFTER digit-collapse so they still
|
||
// act as a boundary while collapsing (otherwise "one one eight decimal seven"
|
||
// would fold to "1187" instead of the intended "118 7").
|
||
const DECORATION_RE = /\b(decimal|point|dash|and)\b/g
|
||
|
||
export function normalizeForMatch(value: string): string {
|
||
return value
|
||
.toLowerCase()
|
||
.normalize('NFD')
|
||
.replace(/[^a-z0-9 ]+/g, ' ')
|
||
.replace(/\s+/g, ' ')
|
||
.trim()
|
||
}
|
||
|
||
/** Convert spoken ATC English back to written tokens (digits, runway letters,
|
||
* collapsed callsign codes). Returned text is also `normalizeForMatch`-safe. */
|
||
export function denormalizeSpokenAtc(input: string): string {
|
||
const cleaned = input
|
||
.toLowerCase()
|
||
.normalize('NFD')
|
||
.replace(/x[- ]?ray/g, 'xray')
|
||
.replace(/[^a-z0-9 ]+/g, ' ')
|
||
.replace(/\s+/g, ' ')
|
||
.trim()
|
||
if (!cleaned) return ''
|
||
|
||
const tokens = cleaned.split(' ')
|
||
const mapped: string[] = []
|
||
for (const tok of tokens) {
|
||
if (SPOKEN_DIGIT[tok] !== undefined) mapped.push(SPOKEN_DIGIT[tok]!)
|
||
else if (SPOKEN_LETTER[tok] !== undefined) mapped.push(SPOKEN_LETTER[tok]!)
|
||
else if (RUNWAY_SUFFIX[tok] !== undefined) mapped.push(RUNWAY_SUFFIX[tok]!)
|
||
else if (SCALE_WORDS[tok] !== undefined) mapped.push(SCALE_WORDS[tok]!)
|
||
else mapped.push(tok)
|
||
}
|
||
|
||
let result = mapped.join(' ')
|
||
|
||
// "five thousand" → "5 000" → "5000"
|
||
result = result.replace(/\b(\d)\s+(0{2,3})\b/g, (_m, d, z) => `${d}${z}`)
|
||
|
||
// Collapse runs of single digits: "3 5 9" → "359"
|
||
result = result.replace(/\b(\d(?:\s+\d)+)\b/g, m => m.replace(/\s+/g, ''))
|
||
|
||
// Glue trailing runway letter to its numeric prefix: "25 r" → "25r"
|
||
result = result.replace(/\b(\d{1,3})\s+([lrc])\b/g, '$1$2')
|
||
|
||
// Glue single digit + single letter (SID/STAR suffix pattern): "7 s" → "7s"
|
||
result = result.replace(/\b(\d)\s+([a-z])\b/g, '$1$2')
|
||
|
||
// Glue runs of single letters into a callsign code, but only when followed
|
||
// by digits or end-of-string so we don't crush ordinary words.
|
||
// "d l h 359" → "dlh 359"
|
||
result = result.replace(/\b([a-z](?:\s+[a-z]){1,4})\b(?=\s+\d|\s*$)/g, m => m.replace(/\s+/g, ''))
|
||
|
||
// Strip decoration words now that the digit runs around them have been
|
||
// collapsed and the boundary they provided is no longer needed.
|
||
result = result.replace(DECORATION_RE, ' ')
|
||
|
||
return result.replace(/\s+/g, ' ').trim()
|
||
}
|
||
|
||
function levenshtein(a: string, b: string): number {
|
||
const m = a.length
|
||
const n = b.length
|
||
if (m === 0) return n
|
||
if (n === 0) return m
|
||
const dp = new Array(n + 1).fill(0)
|
||
for (let j = 0; j <= n; j++) dp[j] = j
|
||
for (let i = 1; i <= m; i++) {
|
||
let prev = dp[0]
|
||
dp[0] = i
|
||
for (let j = 1; j <= n; j++) {
|
||
const temp = dp[j]
|
||
const cost = a[i - 1] === b[j - 1] ? 0 : 1
|
||
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost)
|
||
prev = temp
|
||
}
|
||
}
|
||
return dp[n]
|
||
}
|
||
|
||
function allowedDistance(length: number): number {
|
||
if (length <= 12) return 0
|
||
if (length <= 24) return 1
|
||
if (length <= 36) return 2
|
||
return 3
|
||
}
|
||
|
||
/** Loose substring search using a sliding Levenshtein window. */
|
||
export function fuzzyContains(haystack: string, needle: string, extraTolerance = 1): boolean {
|
||
if (!needle) return false
|
||
if (!haystack) return false
|
||
if (haystack.includes(needle)) return true
|
||
const tolerance = allowedDistance(needle.length) + extraTolerance
|
||
const minLen = Math.max(3, needle.length - 2)
|
||
const maxLen = needle.length + 3
|
||
for (let start = 0; start + minLen <= haystack.length; start++) {
|
||
for (let len = minLen; len <= maxLen; len++) {
|
||
if (start + len > haystack.length) break
|
||
const window = haystack.slice(start, start + len)
|
||
if (levenshtein(window, needle) <= tolerance) return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
/** Split a callsign-style candidate ("lufthansa 359", "dlh 359", "baw27") into
|
||
* its alphabetic prefix and trailing digit run. If either side is missing we
|
||
* return null and fall back to whole-string matching. */
|
||
function splitCallsignParts(candidate: string): { alpha: string; digits: string } | null {
|
||
// Accept either "alpha digits" with a space or "alphaDigits" glued together.
|
||
const spaced = candidate.match(/^([a-z][a-z ]*?)\s+(\d{1,5})[a-z]?$/i)
|
||
if (spaced) {
|
||
const alpha = spaced[1]!.trim()
|
||
const digits = spaced[2]!
|
||
if (alpha.length >= 3 && digits.length >= 2) return { alpha, digits }
|
||
}
|
||
const glued = candidate.match(/^([a-z]{2,})(\d{1,5})[a-z]?$/i)
|
||
if (glued) {
|
||
const alpha = glued[1]!
|
||
const digits = glued[2]!
|
||
if (alpha.length >= 3 && digits.length >= 2) return { alpha, digits }
|
||
}
|
||
return null
|
||
}
|
||
|
||
/** Callsign-tolerant match: try the candidate whole, then split into the
|
||
* airline prefix + flight number and require BOTH parts to appear (with fuzz)
|
||
* in the haystack. This rescues common Whisper errors like:
|
||
* - "Loftansa three five niner" → matches "Lufthansa 359"
|
||
* - "Speed bird 27" → matches "Speedbird 27"
|
||
* - "Lufthana 359" → matches "Lufthansa 359" (typo)
|
||
* - "easy 25" → matches "EZY25" via alts
|
||
*/
|
||
function callsignMatches(haystack: string, candidate: string): boolean {
|
||
if (!candidate || !haystack) return false
|
||
if (candidate.length >= 3 && haystack.includes(candidate)) return true
|
||
// Generous whole-string fuzzy (alphabetic typos in airline name).
|
||
if (fuzzyContains(haystack, candidate, 3)) return true
|
||
|
||
const parts = splitCallsignParts(candidate)
|
||
if (!parts) return false
|
||
const { alpha, digits } = parts
|
||
|
||
// Digit part is the strong anchor — it must be present, allowing a single
|
||
// typo on longer flight numbers.
|
||
const digitsOk = haystack.includes(digits)
|
||
|| (digits.length >= 3 && fuzzyContains(haystack, digits, 1))
|
||
if (!digitsOk) return false
|
||
|
||
// Alpha part may be misspelled by Whisper or split across whitespace; we
|
||
// allow ~25% character distance plus the base allowance.
|
||
const alphaCompact = alpha.replace(/\s+/g, '')
|
||
const alphaTolerance = Math.max(2, Math.floor(alphaCompact.length / 4))
|
||
if (haystack.includes(alpha)) return true
|
||
if (alphaCompact.length >= 4 && haystack.replace(/\s+/g, '').includes(alphaCompact)) return true
|
||
return fuzzyContains(haystack, alpha, alphaTolerance)
|
||
|| fuzzyContains(haystack.replace(/\s+/g, ''), alphaCompact, alphaTolerance)
|
||
}
|
||
|
||
export interface SttFieldDef {
|
||
key: string
|
||
expected: string
|
||
alternatives?: string[]
|
||
isCallsign?: boolean
|
||
}
|
||
|
||
export interface SttFieldReport {
|
||
key: string
|
||
expected: string
|
||
matched: boolean
|
||
/** The normalized candidate form that matched the transcription, or null. */
|
||
matchedVia: string | null
|
||
/** Which transcription view the match landed in. */
|
||
view: 'raw' | 'spoken' | 'callsign' | null
|
||
}
|
||
|
||
export interface SttMatchResult {
|
||
matches: Record<string, string>
|
||
filled: number
|
||
total: number
|
||
/** Normalized raw transcription (what Whisper returned, cleaned). */
|
||
normalized: string
|
||
/** Spoken→written folded view ("two five right" → "25r"). */
|
||
denormalized: string
|
||
/** Per-field diagnostic, in the original field order, for the comm log. */
|
||
fields: SttFieldReport[]
|
||
}
|
||
|
||
function escapeRegex(value: string): string {
|
||
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||
}
|
||
|
||
/** Match a candidate string in the haystack. Short (1–2 char) candidates only
|
||
* hit when they appear as a standalone token — this prevents the digit "5"
|
||
* from matching anywhere inside a callsign like "359". */
|
||
function candidateMatches(haystack: string, candidate: string): boolean {
|
||
if (!candidate || !haystack) return false
|
||
if (candidate.length >= 3) return haystack.includes(candidate)
|
||
const re = new RegExp(`(^|\\s)${escapeRegex(candidate)}(\\s|$)`)
|
||
return re.test(haystack)
|
||
}
|
||
|
||
function pickLongestExpected(fields: SttFieldDef[]): SttFieldDef[] {
|
||
// Longer expected values are more discriminating and should claim the
|
||
// transcription substring before shorter ones. Stable for equal lengths.
|
||
return fields
|
||
.map((field, index) => ({ field, index, length: (field.expected || '').length }))
|
||
.sort((a, b) => b.length - a.length || a.index - b.index)
|
||
.map(entry => entry.field)
|
||
}
|
||
|
||
export function matchTranscriptionToFields(
|
||
transcription: string,
|
||
fields: SttFieldDef[],
|
||
): SttMatchResult {
|
||
const normalized = normalizeForMatch(transcription)
|
||
const denormalized = normalizeForMatch(denormalizeSpokenAtc(transcription))
|
||
const matches: Record<string, string> = {}
|
||
// Per-field diagnostic keyed by field.key (output in original order below).
|
||
const reportByKey: Record<string, SttFieldReport> = {}
|
||
let filled = 0
|
||
|
||
for (const field of pickLongestExpected(fields)) {
|
||
const expectedRaw = (field.expected || '').trim()
|
||
const report: SttFieldReport = {
|
||
key: field.key,
|
||
expected: expectedRaw,
|
||
matched: false,
|
||
matchedVia: null,
|
||
view: null,
|
||
}
|
||
reportByKey[field.key] = report
|
||
if (!expectedRaw) continue
|
||
|
||
const altList = field.alternatives || []
|
||
const candidates = Array.from(new Set([expectedRaw, ...altList]
|
||
.map(c => (c || '').trim())
|
||
.filter(Boolean)
|
||
.map(normalizeForMatch)
|
||
.filter(c => c.length >= 1)))
|
||
|
||
for (const cand of candidates) {
|
||
if (!cand) continue
|
||
if (candidateMatches(normalized, cand)) {
|
||
report.matched = true; report.matchedVia = cand; report.view = 'raw'; break
|
||
}
|
||
if (candidateMatches(denormalized, cand)) {
|
||
report.matched = true; report.matchedVia = cand; report.view = 'spoken'; break
|
||
}
|
||
if (field.isCallsign && cand.length >= 4
|
||
&& (callsignMatches(normalized, cand) || callsignMatches(denormalized, cand))) {
|
||
report.matched = true; report.matchedVia = cand; report.view = 'callsign'; break
|
||
}
|
||
}
|
||
|
||
if (report.matched) {
|
||
matches[field.key] = expectedRaw
|
||
filled++
|
||
}
|
||
}
|
||
|
||
return {
|
||
matches,
|
||
filled,
|
||
total: fields.length,
|
||
normalized,
|
||
denormalized,
|
||
fields: fields.map(f => reportByKey[f.key] ?? {
|
||
key: f.key, expected: (f.expected || '').trim(), matched: false, matchedVia: null, view: null,
|
||
}),
|
||
}
|
||
}
|
||
|
||
export function looksLikeCallsignKey(key: string, label?: string): boolean {
|
||
const probe = `${key} ${label || ''}`.toLowerCase()
|
||
return /\b(callsign|call sign|callup)\b/.test(probe)
|
||
|| /callsign$/.test(key)
|
||
|| /-callsign\b/.test(key)
|
||
}
|