feat: markov bigram nick generator for natural-looking identities

Replace uniform random chars with English bigram frequency table.
Enforces max 2 consecutive consonants for pronounceability. Nicks,
idents, and realnames now look like plausible human-chosen words.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-19 18:08:57 +01:00
parent 86832b8fe5
commit 845496f1b3

View File

@@ -5,7 +5,6 @@ from __future__ import annotations
import asyncio
import logging
import random
import string
from enum import Enum, auto
from typing import Callable
@@ -28,30 +27,86 @@ class State(Enum):
READY = auto()
# English bigram transitions tuned for pronounceability.
# Consonants -> vowel-heavy followers, vowels -> consonant-heavy followers.
_BIGRAMS: dict[str, str] = {
"a": "ntrslcdmgbpkivwfyh",
"b": "eeoaruiy",
"c": "ooehaakri",
"d": "eeioaury",
"e": "rnsdltacmxpvwiy",
"f": "iioeerau",
"g": "eeohairu",
"h": "eeaoitru",
"i": "nstcldemorgav",
"j": "ooueeai",
"k": "eeinsay",
"l": "eeioalduys",
"m": "eeaoipuy",
"n": "gdeetioauc",
"o": "nfrumtlswpcdvbgk",
"p": "eeroalih",
"q": "u",
"r": "eeioastundy",
"s": "teehioaupl",
"t": "hheiaorusy",
"u": "rnstlcpmgdba",
"v": "eeiiao",
"w": "aaihone",
"x": "iieea",
"y": "seomtaiwn",
"z": "eaaio",
}
_VOWELS = set("aeiou")
# Starting letter weights (English word-initial frequency)
_STARTERS = "ttssaaccmmppddhhiibbrrffggll"
def _markov_word(min_len: int, max_len: int) -> str:
"""Generate a pronounceable word using English bigram frequencies."""
length = random.randint(min_len, max_len)
ch = random.choice(_STARTERS)
word = [ch]
consonant_run = 0 if ch in _VOWELS else 1
for _ in range(length - 1):
followers = _BIGRAMS.get(ch, "aeiou")
# If we've had 2 consonants in a row, force a vowel
if consonant_run >= 2:
vowels = [c for c in followers if c in _VOWELS]
ch = random.choice(vowels) if vowels else random.choice("aeiou")
else:
ch = random.choice(followers)
if ch in _VOWELS:
consonant_run = 0
else:
consonant_run += 1
word.append(ch)
return "".join(word)
def _random_nick() -> str:
"""Generate a nick that looks like a typical human-chosen IRC nick."""
# Mix of patterns seen on real IRC networks
length = random.randint(6, 10)
# Start with a letter, rest is alphanumeric
first = random.choice(string.ascii_lowercase)
rest = "".join(random.choices(string.ascii_lowercase + string.digits, k=length - 1))
return first + rest
"""Generate a pronounceable nick that looks human-chosen."""
base = _markov_word(5, 8)
# Occasionally append a digit suffix (common IRC pattern)
if random.random() < 0.3:
base += str(random.randint(0, 99))
return base
def _random_user() -> str:
"""Generate a generic-looking ident."""
length = random.randint(4, 8)
first = random.choice(string.ascii_lowercase)
rest = "".join(random.choices(string.ascii_lowercase, k=length - 1))
return first + rest
"""Generate a pronounceable ident."""
return _markov_word(4, 7)
def _random_realname() -> str:
"""Generate a plausible realname."""
length = random.randint(4, 8)
first = random.choice(string.ascii_uppercase)
rest = "".join(random.choices(string.ascii_lowercase, k=length - 1))
return first + rest
"""Generate a plausible first-name-like realname."""
name = _markov_word(4, 7)
return name[0].upper() + name[1:]
class Network: