fertig-classic-games/server/scripts/genMiniCrossword.js

340 lines
12 KiB
JavaScript

// Offline generator for Mini Crossword grids.
//
// Produces *non-symmetric* crossword grids (across words differ from down words)
// at three sizes — easy 5x5, medium 6x6, hard 7x7 — by filling block templates
// against the ENABLE word list with a backtracking solver (MRV slot ordering +
// forward checking). It emits a candidate JSON whose `across`/`down` clue arrays
// are blank placeholders, plus the answers, so clues can be hand-authored after.
//
// Usage:
// node server/scripts/genMiniCrossword.js [perTier] [outFile]
// node server/scripts/genMiniCrossword.js 20 server/data/crosswords/_generated.json
//
// All runs are length >= 3. Templates are rotationally symmetric. Each puzzle
// uses every word at most once.
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
// Common-word subset (top frequency ∩ ENABLE) so fills stay clue-able rather
// than picking obscure Scrabble words. Built once via buildCommonWords.js.
const WORDLIST_PATH = path.join(__dirname, '../data/wordlists/common.txt');
const BLOCK = '#';
const EMPTY = '.';
// ── Block templates ────────────────────────────────────────────────────────────
// '.' = fillable cell, '#' = black square. Each is rotationally symmetric with no
// run shorter than 3. Blocks make the fill *easier* (shorter, more-flexible runs)
// while giving a real-crossword shape. More than one template per tier yields
// more grid variety.
const TEMPLATES = {
easy: [
// Open 5x5: a double word square — 5 across + 5 down, all distinct words.
[
'.....',
'.....',
'.....',
'.....',
'.....',
],
// 5x5 with symmetric corner nicks (len-4 corners + len-5 spines).
[
'#....',
'.....',
'.....',
'.....',
'....#',
],
],
medium: [
// 6x6 corner-cut: len-4 corners, len-6 spines.
[
'#....#',
'......',
'......',
'......',
'......',
'#....#',
],
],
hard: [
// 7x7 with symmetric corner blocks + center split. Runs of length 3, 5, 7.
[
'##...##',
'#.....#',
'.......',
'...#...',
'.......',
'#.....#',
'##...##',
],
],
};
const TIER_SIZE = { easy: 5, medium: 6, hard: 7 };
// ── Word list ───────────────────────────────────────────────────────────────────
// Returns { byLen: Map<len, string[]>, index: Map<len, Array<Map<letter, Set<idx>>>> }.
// The index lets us find words matching a partial pattern by intersecting the
// candidate sets of the already-fixed letter positions.
function loadWords() {
const raw = fs.readFileSync(WORDLIST_PATH, 'utf8');
const byLen = new Map();
for (const line of raw.split('\n')) {
const w = line.trim().toUpperCase();
if (w.length < 3 || w.length > 7 || !/^[A-Z]+$/.test(w)) continue;
if (!byLen.has(w.length)) byLen.set(w.length, []);
byLen.get(w.length).push(w);
}
const index = new Map();
const sets = new Map();
for (const [len, words] of byLen) {
sets.set(len, new Set(words));
const positions = Array.from({ length: len }, () => new Map());
words.forEach((w, i) => {
for (let p = 0; p < len; p++) {
const m = positions[p];
const ch = w[p];
if (!m.has(ch)) m.set(ch, new Set());
m.get(ch).add(i);
}
});
index.set(len, positions);
}
return { byLen, index, sets };
}
// Words of `len` whose letters match `pattern` (array of letter|null), excluding
// any already in `used`. Intersects the smallest fixed-position sets first.
function candidates({ byLen, index }, len, pattern, used) {
const fixed = [];
for (let p = 0; p < len; p++) {
if (pattern[p]) fixed.push([p, pattern[p]]);
}
const words = byLen.get(len) || [];
if (!fixed.length) {
return words.filter((w) => !used.has(w));
}
const positions = index.get(len);
const sets = fixed.map(([p, ch]) => positions[p].get(ch) || new Set());
if (sets.some((s) => s.size === 0)) return [];
sets.sort((a, b) => a.size - b.size);
const out = [];
for (const i of sets[0]) {
let ok = true;
for (let k = 1; k < sets.length; k++) {
if (!sets[k].has(i)) { ok = false; break; }
}
if (!ok) continue;
const w = words[i];
if (!used.has(w)) out.push(w);
}
return out;
}
// ── Slot extraction (matches the engine's numbering convention) ─────────────────
function buildGrid(template) {
return template.map((row) => row.split(''));
}
function isCell(grid, r, c) {
return r >= 0 && r < grid.length && c >= 0 && c < grid[0].length && grid[r][c] !== BLOCK;
}
// Returns numbered across/down slots; each slot lists its cells in order.
function deriveSlots(grid) {
const across = [];
const down = [];
let number = 0;
for (let r = 0; r < grid.length; r++) {
for (let c = 0; c < grid[0].length; c++) {
if (!isCell(grid, r, c)) continue;
const startsAcross = !isCell(grid, r, c - 1) && isCell(grid, r, c + 1);
const startsDown = !isCell(grid, r - 1, c) && isCell(grid, r + 1, c);
if (!startsAcross && !startsDown) continue;
number += 1;
if (startsAcross) {
const cells = [];
let cc = c;
while (isCell(grid, r, cc)) { cells.push([r, cc]); cc += 1; }
across.push({ number, row: r, col: c, len: cells.length, cells });
}
if (startsDown) {
const cells = [];
let rr = r;
while (isCell(grid, rr, c)) { cells.push([rr, c]); rr += 1; }
down.push({ number, row: r, col: c, len: cells.length, cells });
}
}
}
return [...across, ...down]; // combined slot list for the solver
}
// ── Backtracking solver ─────────────────────────────────────────────────────────
function patternFor(grid, slot) {
return slot.cells.map(([r, c]) => (grid[r][c] === EMPTY ? null : grid[r][c]));
}
function isFilled(grid, slot) {
return slot.cells.every(([r, c]) => grid[r][c] !== EMPTY);
}
function wordAt(grid, slot) {
return slot.cells.map(([r, c]) => grid[r][c]).join('');
}
function placeWord(grid, slot, word) {
const prev = slot.cells.map(([r, c]) => grid[r][c]);
slot.cells.forEach(([r, c], i) => { grid[r][c] = word[i]; });
return prev;
}
function unplace(grid, slot, prev) {
slot.cells.forEach(([r, c], i) => { grid[r][c] = prev[i]; });
}
// Fills `grid` so every slot is a distinct valid word. Every slot is explicitly
// assigned a candidate word — a slot already fully determined by crossing words
// simply has one candidate (if that spelling is a real word) or zero (if not), so
// invalid crossings are pruned automatically. Mutates grid in place; returns true
// on success. `assigned` tracks resolved slots; `deadline` is an epoch-ms budget.
function solve(grid, slots, words, used, assigned, deadline) {
if (Date.now() > deadline) return false;
// MRV: assign the unresolved slot with the fewest candidates first. Slots fixed
// by crossings collapse to <=1 candidate and resolve immediately.
let target = null;
let targetCands = null;
for (const slot of slots) {
if (assigned.has(slot)) continue;
const cands = candidates(words, slot.len, patternFor(grid, slot), used);
if (cands.length === 0) return false;
if (!targetCands || cands.length < targetCands.length) {
target = slot;
targetCands = cands;
if (cands.length === 1) break;
}
}
if (!target) return true; // every slot assigned
shuffle(targetCands);
for (const word of targetCands) {
const prev = placeWord(grid, target, word);
used.add(word);
assigned.add(target);
if (solve(grid, slots, words, used, assigned, deadline)) return true;
assigned.delete(target);
used.delete(word);
unplace(grid, target, prev);
}
return false;
}
function shuffle(arr) {
for (let i = arr.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[arr[i], arr[j]] = [arr[j], arr[i]];
}
return arr;
}
// ── Puzzle assembly ─────────────────────────────────────────────────────────────
// Re-derive ordered across/down answer lists from a solved grid (numbering order).
function answersFromGrid(grid) {
const across = [];
const down = [];
let number = 0;
for (let r = 0; r < grid.length; r++) {
for (let c = 0; c < grid[0].length; c++) {
if (!isCell(grid, r, c)) continue;
const sa = !isCell(grid, r, c - 1) && isCell(grid, r, c + 1);
const sd = !isCell(grid, r - 1, c) && isCell(grid, r + 1, c);
if (!sa && !sd) continue;
number += 1;
if (sa) { let s = '', cc = c; while (isCell(grid, r, cc)) { s += grid[r][cc]; cc++; } across.push({ number, answer: s }); }
if (sd) { let s = '', rr = r; while (isCell(grid, rr, c)) { s += grid[rr][c]; rr++; } down.push({ number, answer: s }); }
}
}
return { across, down };
}
function generateOne(template, words, timeoutMs = 800) {
const grid = buildGrid(template);
const slots = deriveSlots(grid);
const used = new Set();
const ok = solve(grid, slots, words, used, new Set(), Date.now() + timeoutMs);
if (!ok) return null;
// Safety net: every across/down answer must be a real common word and unique.
const seen = new Set();
for (const slot of slots) {
const w = wordAt(grid, slot);
if (!words.sets.get(slot.len)?.has(w) || seen.has(w)) return null;
seen.add(w);
}
return grid.map((row) => row.join(''));
}
function main() {
const perTier = parseInt(process.argv[2], 10) || 20;
const outFile = process.argv[3] || path.join(__dirname, '../data/crosswords/_generated.json');
console.log('Loading ENABLE word list...');
const words = loadWords();
console.log(` ${[...words.byLen].map(([l, a]) => `${l}:${a.length}`).join(' ')}`);
const bank = [];
for (const tier of ['easy', 'medium', 'hard']) {
const templates = TEMPLATES[tier];
const seen = new Set();
let made = 0;
let attempts = 0;
const maxAttempts = perTier * 80;
const t0 = Date.now();
while (made < perTier && attempts < maxAttempts) {
attempts += 1;
const template = templates[attempts % templates.length];
const rows = generateOne(template, words);
if (process.env.CW_DEBUG && attempts % 5 === 0) {
console.log(` [${tier}] attempt ${attempts}, made ${made}, ${((Date.now() - t0) / 1000).toFixed(1)}s`);
}
if (!rows) continue;
const key = rows.join('|');
if (seen.has(key)) continue;
seen.add(key);
const { across, down } = answersFromGrid(buildGrid(rows));
bank.push({
id: `${tier}-${String(made + 1).padStart(3, '0')}`,
difficulty: tier,
grid: rows,
// Reference answers (NOT consumed by the engine) to author clues against:
_answersAcross: across.map((a) => `${a.number}. ${a.answer}`),
_answersDown: down.map((a) => `${a.number}. ${a.answer}`),
across: across.map(() => ''),
down: down.map(() => ''),
});
made += 1;
}
console.log(`${tier} (${TIER_SIZE[tier]}x${TIER_SIZE[tier]}): ${made}/${perTier} grids in ${attempts} attempts, ${((Date.now() - t0) / 1000).toFixed(1)}s`);
}
fs.writeFileSync(outFile, JSON.stringify(bank, null, 2));
console.log(`\nWrote ${bank.length} candidate puzzles to ${outFile}`);
console.log('Fill the empty across/down clue arrays; _answers* fields are references.');
}
main();