feat: markov bigram nick generator for natural-looking identities
Replace uniform random chars with English bigram frequency table. Enforces max 2 consecutive consonants for pronounceability. Nicks, idents, and realnames now look like plausible human-chosen words. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,6 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
import string
|
||||
from enum import Enum, auto
|
||||
from typing import Callable
|
||||
|
||||
@@ -28,30 +27,86 @@ class State(Enum):
|
||||
READY = auto()
|
||||
|
||||
|
||||
# English bigram transitions tuned for pronounceability.
|
||||
# Consonants -> vowel-heavy followers, vowels -> consonant-heavy followers.
|
||||
_BIGRAMS: dict[str, str] = {
|
||||
"a": "ntrslcdmgbpkivwfyh",
|
||||
"b": "eeoaruiy",
|
||||
"c": "ooehaakri",
|
||||
"d": "eeioaury",
|
||||
"e": "rnsdltacmxpvwiy",
|
||||
"f": "iioeerau",
|
||||
"g": "eeohairu",
|
||||
"h": "eeaoitru",
|
||||
"i": "nstcldemorgav",
|
||||
"j": "ooueeai",
|
||||
"k": "eeinsay",
|
||||
"l": "eeioalduys",
|
||||
"m": "eeaoipuy",
|
||||
"n": "gdeetioauc",
|
||||
"o": "nfrumtlswpcdvbgk",
|
||||
"p": "eeroalih",
|
||||
"q": "u",
|
||||
"r": "eeioastundy",
|
||||
"s": "teehioaupl",
|
||||
"t": "hheiaorusy",
|
||||
"u": "rnstlcpmgdba",
|
||||
"v": "eeiiao",
|
||||
"w": "aaihone",
|
||||
"x": "iieea",
|
||||
"y": "seomtaiwn",
|
||||
"z": "eaaio",
|
||||
}
|
||||
|
||||
_VOWELS = set("aeiou")
|
||||
|
||||
# Starting letter weights (English word-initial frequency)
|
||||
_STARTERS = "ttssaaccmmppddhhiibbrrffggll"
|
||||
|
||||
|
||||
def _markov_word(min_len: int, max_len: int) -> str:
|
||||
"""Generate a pronounceable word using English bigram frequencies."""
|
||||
length = random.randint(min_len, max_len)
|
||||
ch = random.choice(_STARTERS)
|
||||
word = [ch]
|
||||
consonant_run = 0 if ch in _VOWELS else 1
|
||||
|
||||
for _ in range(length - 1):
|
||||
followers = _BIGRAMS.get(ch, "aeiou")
|
||||
# If we've had 2 consonants in a row, force a vowel
|
||||
if consonant_run >= 2:
|
||||
vowels = [c for c in followers if c in _VOWELS]
|
||||
ch = random.choice(vowels) if vowels else random.choice("aeiou")
|
||||
else:
|
||||
ch = random.choice(followers)
|
||||
|
||||
if ch in _VOWELS:
|
||||
consonant_run = 0
|
||||
else:
|
||||
consonant_run += 1
|
||||
|
||||
word.append(ch)
|
||||
return "".join(word)
|
||||
|
||||
|
||||
def _random_nick() -> str:
|
||||
"""Generate a nick that looks like a typical human-chosen IRC nick."""
|
||||
# Mix of patterns seen on real IRC networks
|
||||
length = random.randint(6, 10)
|
||||
# Start with a letter, rest is alphanumeric
|
||||
first = random.choice(string.ascii_lowercase)
|
||||
rest = "".join(random.choices(string.ascii_lowercase + string.digits, k=length - 1))
|
||||
return first + rest
|
||||
"""Generate a pronounceable nick that looks human-chosen."""
|
||||
base = _markov_word(5, 8)
|
||||
# Occasionally append a digit suffix (common IRC pattern)
|
||||
if random.random() < 0.3:
|
||||
base += str(random.randint(0, 99))
|
||||
return base
|
||||
|
||||
|
||||
def _random_user() -> str:
|
||||
"""Generate a generic-looking ident."""
|
||||
length = random.randint(4, 8)
|
||||
first = random.choice(string.ascii_lowercase)
|
||||
rest = "".join(random.choices(string.ascii_lowercase, k=length - 1))
|
||||
return first + rest
|
||||
"""Generate a pronounceable ident."""
|
||||
return _markov_word(4, 7)
|
||||
|
||||
|
||||
def _random_realname() -> str:
|
||||
"""Generate a plausible realname."""
|
||||
length = random.randint(4, 8)
|
||||
first = random.choice(string.ascii_uppercase)
|
||||
rest = "".join(random.choices(string.ascii_lowercase, k=length - 1))
|
||||
return first + rest
|
||||
"""Generate a plausible first-name-like realname."""
|
||||
name = _markov_word(4, 7)
|
||||
return name[0].upper() + name[1:]
|
||||
|
||||
|
||||
class Network:
|
||||
|
||||
Reference in New Issue
Block a user