translations: add multi-lingual search term generation
- Static translations for 15 languages (ru, zh, es, pt, de, fr, ja, ko, ar, id, tr, vi, th, pl, uk) - LibreTranslate API integration with configurable endpoint - Dynamic language detection from API /languages endpoint - Persistent JSON cache with 30-day TTL - Categorized search terms: generic, protocol, anonymity, freshness, format, sources, geographic, use-case, search operators - Dynamic year substitution for freshness terms
This commit is contained in:
674
translations.py
Normal file
674
translations.py
Normal file
@@ -0,0 +1,674 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Multi-lingual search term generation with LibreTranslate support."""
|
||||
|
||||
import random
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import datetime
|
||||
from misc import _log
|
||||
|
||||
# Current year for freshness terms
|
||||
CURRENT_YEAR = str(datetime.datetime.now().year)
|
||||
|
||||
# Module-level configuration (set by set_config())
|
||||
_libretranslate_url = 'https://lt.mymx.me/translate'
|
||||
_libretranslate_enabled = True
|
||||
_libretranslate_timeout = 10
|
||||
|
||||
# Base English terms - categorized for better coverage
|
||||
BASE_TERMS_GENERIC = [
|
||||
'free proxy list',
|
||||
'proxy server list',
|
||||
'public proxy list',
|
||||
'open proxy list',
|
||||
]
|
||||
|
||||
BASE_TERMS_PROTOCOL = [
|
||||
'socks5 proxy list',
|
||||
'socks4 proxy list',
|
||||
'http proxy list',
|
||||
'https proxy list',
|
||||
]
|
||||
|
||||
BASE_TERMS_ANONYMITY = [
|
||||
'anonymous proxy',
|
||||
'elite proxy',
|
||||
'high anonymity proxy',
|
||||
'transparent proxy list',
|
||||
]
|
||||
|
||||
BASE_TERMS_FRESHNESS = [
|
||||
'fresh proxy list',
|
||||
'working proxy list',
|
||||
'verified proxy list',
|
||||
'checked proxy list',
|
||||
'live proxy list',
|
||||
'proxy list today',
|
||||
'proxy list updated',
|
||||
'proxy list ' + CURRENT_YEAR,
|
||||
'new proxy list ' + CURRENT_YEAR,
|
||||
]
|
||||
|
||||
BASE_TERMS_FORMAT = [
|
||||
'proxy list txt',
|
||||
'proxy list ip port',
|
||||
'proxy list download',
|
||||
'proxy txt file',
|
||||
]
|
||||
|
||||
BASE_TERMS_SOURCES = [
|
||||
'proxy pastebin',
|
||||
'proxy github',
|
||||
'proxy list telegram',
|
||||
'free proxy api',
|
||||
]
|
||||
|
||||
BASE_TERMS_GEOGRAPHIC = [
|
||||
'US proxy list',
|
||||
'USA proxy',
|
||||
'Europe proxy list',
|
||||
'Asia proxy list',
|
||||
'Russia proxy list',
|
||||
'China proxy list',
|
||||
]
|
||||
|
||||
BASE_TERMS_USECASE = [
|
||||
'proxy for scraping',
|
||||
'fast proxy list',
|
||||
'residential proxy list',
|
||||
'datacenter proxy list',
|
||||
]
|
||||
|
||||
BASE_TERMS_SEARCH_OPS = [
|
||||
'filetype:txt proxy list',
|
||||
'inurl:proxy.txt',
|
||||
'inurl:proxies.txt',
|
||||
'intitle:proxy list',
|
||||
]
|
||||
|
||||
# Combined list for random selection
|
||||
BASE_TERMS = (
|
||||
BASE_TERMS_GENERIC +
|
||||
BASE_TERMS_PROTOCOL +
|
||||
BASE_TERMS_ANONYMITY +
|
||||
BASE_TERMS_FRESHNESS +
|
||||
BASE_TERMS_FORMAT +
|
||||
BASE_TERMS_SOURCES +
|
||||
BASE_TERMS_GEOGRAPHIC +
|
||||
BASE_TERMS_USECASE +
|
||||
BASE_TERMS_SEARCH_OPS
|
||||
)
|
||||
|
||||
# Terms that should be translated (exclude search operators and technical terms)
|
||||
TRANSLATABLE_TERMS = (
|
||||
BASE_TERMS_GENERIC +
|
||||
BASE_TERMS_ANONYMITY +
|
||||
BASE_TERMS_FRESHNESS
|
||||
)
|
||||
|
||||
# Static translations - no API needed
|
||||
# Format: {lang_code: {english_term: translated_term}}
|
||||
STATIC_TRANSLATIONS = {
|
||||
'ru': {
|
||||
'free proxy list': u'бесплатный список прокси',
|
||||
'socks5 proxy': u'socks5 прокси',
|
||||
'socks4 proxy': u'socks4 прокси',
|
||||
'http proxy': u'http прокси',
|
||||
'proxy server list': u'список прокси серверов',
|
||||
'anonymous proxy': u'анонимный прокси',
|
||||
'elite proxy': u'элитный прокси',
|
||||
'fresh proxy': u'свежие прокси',
|
||||
'working proxy': u'рабочие прокси',
|
||||
'proxy list updated': u'обновленный список прокси',
|
||||
},
|
||||
'zh': {
|
||||
'free proxy list': u'免费代理列表',
|
||||
'socks5 proxy': u'socks5代理',
|
||||
'socks4 proxy': u'socks4代理',
|
||||
'http proxy': u'http代理',
|
||||
'proxy server list': u'代理服务器列表',
|
||||
'anonymous proxy': u'匿名代理',
|
||||
'elite proxy': u'高匿代理',
|
||||
'fresh proxy': u'最新代理',
|
||||
'working proxy': u'可用代理',
|
||||
'proxy list updated': u'代理列表更新',
|
||||
},
|
||||
'es': {
|
||||
'free proxy list': u'lista de proxies gratis',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'lista de servidores proxy',
|
||||
'anonymous proxy': u'proxy anónimo',
|
||||
'elite proxy': u'proxy elite',
|
||||
'fresh proxy': u'proxies frescos',
|
||||
'working proxy': u'proxies funcionando',
|
||||
'proxy list updated': u'lista de proxies actualizada',
|
||||
},
|
||||
'pt': {
|
||||
'free proxy list': u'lista de proxy grátis',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'lista de servidores proxy',
|
||||
'anonymous proxy': u'proxy anônimo',
|
||||
'elite proxy': u'proxy elite',
|
||||
'fresh proxy': u'proxies novos',
|
||||
'working proxy': u'proxies funcionando',
|
||||
'proxy list updated': u'lista de proxy atualizada',
|
||||
},
|
||||
'de': {
|
||||
'free proxy list': u'kostenlose Proxy-Liste',
|
||||
'socks5 proxy': u'socks5 Proxy',
|
||||
'socks4 proxy': u'socks4 Proxy',
|
||||
'http proxy': u'http Proxy',
|
||||
'proxy server list': u'Proxy-Server-Liste',
|
||||
'anonymous proxy': u'anonymer Proxy',
|
||||
'elite proxy': u'Elite-Proxy',
|
||||
'fresh proxy': u'frische Proxys',
|
||||
'working proxy': u'funktionierende Proxys',
|
||||
'proxy list updated': u'aktualisierte Proxy-Liste',
|
||||
},
|
||||
'fr': {
|
||||
'free proxy list': u'liste de proxy gratuit',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'liste de serveurs proxy',
|
||||
'anonymous proxy': u'proxy anonyme',
|
||||
'elite proxy': u'proxy élite',
|
||||
'fresh proxy': u'proxies frais',
|
||||
'working proxy': u'proxies fonctionnels',
|
||||
'proxy list updated': u'liste de proxy mise à jour',
|
||||
},
|
||||
'ja': {
|
||||
'free proxy list': u'無料プロキシリスト',
|
||||
'socks5 proxy': u'socks5プロキシ',
|
||||
'socks4 proxy': u'socks4プロキシ',
|
||||
'http proxy': u'httpプロキシ',
|
||||
'proxy server list': u'プロキシサーバーリスト',
|
||||
'anonymous proxy': u'匿名プロキシ',
|
||||
'elite proxy': u'エリートプロキシ',
|
||||
'fresh proxy': u'最新プロキシ',
|
||||
'working proxy': u'動作するプロキシ',
|
||||
'proxy list updated': u'プロキシリスト更新',
|
||||
},
|
||||
'ko': {
|
||||
'free proxy list': u'무료 프록시 목록',
|
||||
'socks5 proxy': u'socks5 프록시',
|
||||
'socks4 proxy': u'socks4 프록시',
|
||||
'http proxy': u'http 프록시',
|
||||
'proxy server list': u'프록시 서버 목록',
|
||||
'anonymous proxy': u'익명 프록시',
|
||||
'elite proxy': u'엘리트 프록시',
|
||||
'fresh proxy': u'최신 프록시',
|
||||
'working proxy': u'작동하는 프록시',
|
||||
'proxy list updated': u'프록시 목록 업데이트',
|
||||
},
|
||||
'ar': {
|
||||
'free proxy list': u'قائمة بروكسي مجانية',
|
||||
'socks5 proxy': u'بروكسي socks5',
|
||||
'socks4 proxy': u'بروكسي socks4',
|
||||
'http proxy': u'بروكسي http',
|
||||
'proxy server list': u'قائمة خوادم البروكسي',
|
||||
'anonymous proxy': u'بروكسي مجهول',
|
||||
'elite proxy': u'بروكسي نخبة',
|
||||
'fresh proxy': u'بروكسي جديد',
|
||||
'working proxy': u'بروكسي يعمل',
|
||||
'proxy list updated': u'قائمة بروكسي محدثة',
|
||||
},
|
||||
'id': {
|
||||
'free proxy list': u'daftar proxy gratis',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'daftar server proxy',
|
||||
'anonymous proxy': u'proxy anonim',
|
||||
'elite proxy': u'proxy elite',
|
||||
'fresh proxy': u'proxy baru',
|
||||
'working proxy': u'proxy aktif',
|
||||
'proxy list updated': u'daftar proxy diperbarui',
|
||||
},
|
||||
'tr': {
|
||||
'free proxy list': u'ücretsiz proxy listesi',
|
||||
'socks5 proxy': u'socks5 proxy',
|
||||
'socks4 proxy': u'socks4 proxy',
|
||||
'http proxy': u'http proxy',
|
||||
'proxy server list': u'proxy sunucu listesi',
|
||||
'anonymous proxy': u'anonim proxy',
|
||||
'elite proxy': u'elit proxy',
|
||||
'fresh proxy': u'güncel proxy',
|
||||
'working proxy': u'çalışan proxy',
|
||||
'proxy list updated': u'güncellenmiş proxy listesi',
|
||||
},
|
||||
'vi': {
|
||||
'free proxy list': u'danh sách proxy miễn phí',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'danh sách máy chủ proxy',
|
||||
'anonymous proxy': u'proxy ẩn danh',
|
||||
'elite proxy': u'proxy cao cấp',
|
||||
'fresh proxy': u'proxy mới',
|
||||
'working proxy': u'proxy hoạt động',
|
||||
'proxy list updated': u'danh sách proxy cập nhật',
|
||||
},
|
||||
'th': {
|
||||
'free proxy list': u'รายการพร็อกซี่ฟรี',
|
||||
'socks5 proxy': u'พร็อกซี่ socks5',
|
||||
'socks4 proxy': u'พร็อกซี่ socks4',
|
||||
'http proxy': u'พร็อกซี่ http',
|
||||
'proxy server list': u'รายการเซิร์ฟเวอร์พร็อกซี่',
|
||||
'anonymous proxy': u'พร็อกซี่นิรนาม',
|
||||
'elite proxy': u'พร็อกซี่ระดับสูง',
|
||||
'fresh proxy': u'พร็อกซี่ใหม่',
|
||||
'working proxy': u'พร็อกซี่ใช้งานได้',
|
||||
'proxy list updated': u'รายการพร็อกซี่อัพเดท',
|
||||
},
|
||||
'pl': {
|
||||
'free proxy list': u'darmowa lista proxy',
|
||||
'socks5 proxy': u'proxy socks5',
|
||||
'socks4 proxy': u'proxy socks4',
|
||||
'http proxy': u'proxy http',
|
||||
'proxy server list': u'lista serwerów proxy',
|
||||
'anonymous proxy': u'anonimowe proxy',
|
||||
'elite proxy': u'elitarne proxy',
|
||||
'fresh proxy': u'świeże proxy',
|
||||
'working proxy': u'działające proxy',
|
||||
'proxy list updated': u'zaktualizowana lista proxy',
|
||||
},
|
||||
'uk': {
|
||||
'free proxy list': u'безкоштовний список проксі',
|
||||
'socks5 proxy': u'socks5 проксі',
|
||||
'socks4 proxy': u'socks4 проксі',
|
||||
'http proxy': u'http проксі',
|
||||
'proxy server list': u'список проксі серверів',
|
||||
'anonymous proxy': u'анонімний проксі',
|
||||
'elite proxy': u'елітний проксі',
|
||||
'fresh proxy': u'свіжі проксі',
|
||||
'working proxy': u'робочі проксі',
|
||||
'proxy list updated': u'оновлений список проксі',
|
||||
},
|
||||
}
|
||||
|
||||
# All available languages
|
||||
LANGUAGES = list(STATIC_TRANSLATIONS.keys()) + ['en']
|
||||
|
||||
# LibreTranslate available languages (populated dynamically)
|
||||
_libretranslate_langs = set()
|
||||
_libretranslate_langs_checked = False
|
||||
|
||||
# Cache for online translations
|
||||
_translation_cache = {}
|
||||
_cache_file = 'translation_cache.json'
|
||||
_cache_max_age = 86400 * 30 # 30 days
|
||||
_failed_translations = {} # Track failed translations to avoid repeated API calls
|
||||
_failed_cache_ttl = 3600 # 1 hour before retrying failed translations
|
||||
|
||||
|
||||
def set_config(config):
|
||||
"""Configure translation settings from config object.
|
||||
|
||||
Args:
|
||||
config: Config object with scraper.libretranslate_url and
|
||||
scraper.libretranslate_enabled attributes
|
||||
"""
|
||||
global _libretranslate_url, _libretranslate_enabled
|
||||
if hasattr(config, 'scraper'):
|
||||
if hasattr(config.scraper, 'libretranslate_url'):
|
||||
_libretranslate_url = config.scraper.libretranslate_url
|
||||
if hasattr(config.scraper, 'libretranslate_enabled'):
|
||||
_libretranslate_enabled = config.scraper.libretranslate_enabled
|
||||
|
||||
if _libretranslate_enabled:
|
||||
_fetch_available_languages()
|
||||
_log('LibreTranslate: enabled (%s) - %d languages' % (
|
||||
_libretranslate_url, len(_libretranslate_langs)
|
||||
), 'info')
|
||||
else:
|
||||
_log('LibreTranslate: disabled', 'debug')
|
||||
|
||||
|
||||
def _fetch_available_languages():
|
||||
"""Fetch available languages from LibreTranslate API.
|
||||
|
||||
Queries the /languages endpoint and extracts languages that can be
|
||||
translated from English (source='en').
|
||||
"""
|
||||
global _libretranslate_langs, _libretranslate_langs_checked
|
||||
|
||||
if _libretranslate_langs_checked:
|
||||
return
|
||||
|
||||
_libretranslate_langs_checked = True
|
||||
|
||||
# Derive base URL from translate endpoint
|
||||
base_url = _libretranslate_url.rsplit('/', 1)[0]
|
||||
languages_url = base_url + '/languages'
|
||||
|
||||
try:
|
||||
import urllib2
|
||||
req = urllib2.Request(languages_url)
|
||||
req.add_header('Accept', 'application/json')
|
||||
req.add_header('User-Agent', 'PPF/1.0')
|
||||
resp = urllib2.urlopen(req, timeout=_libretranslate_timeout)
|
||||
langs = json.loads(resp.read())
|
||||
|
||||
# Find English entry to get available target languages
|
||||
en_targets = set()
|
||||
for lang in langs:
|
||||
if lang.get('code') == 'en':
|
||||
en_targets = set(lang.get('targets', []))
|
||||
break
|
||||
|
||||
# Also collect all language codes as fallback
|
||||
all_codes = set()
|
||||
for lang in langs:
|
||||
code = lang.get('code', '')
|
||||
if code.startswith('zh'):
|
||||
all_codes.add('zh')
|
||||
elif code:
|
||||
all_codes.add(code)
|
||||
|
||||
# Use English targets if available, otherwise all codes
|
||||
if en_targets:
|
||||
# Normalize zh variants
|
||||
codes = set()
|
||||
for code in en_targets:
|
||||
if code.startswith('zh'):
|
||||
codes.add('zh')
|
||||
elif code:
|
||||
codes.add(code)
|
||||
codes.discard('en')
|
||||
_libretranslate_langs = codes
|
||||
else:
|
||||
all_codes.discard('en')
|
||||
_libretranslate_langs = all_codes
|
||||
|
||||
_log('LibreTranslate languages: %s' % ', '.join(sorted(_libretranslate_langs)), 'debug')
|
||||
|
||||
except Exception as e:
|
||||
_log('failed to fetch LibreTranslate languages: %s' % str(e), 'warn')
|
||||
_libretranslate_langs = set()
|
||||
|
||||
|
||||
def _load_cache():
|
||||
"""Load translation cache from disk."""
|
||||
global _translation_cache
|
||||
if os.path.exists(_cache_file):
|
||||
try:
|
||||
with open(_cache_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
# Handle both old format (just translations) and new format (with metadata)
|
||||
if isinstance(data, dict) and '_meta' in data:
|
||||
_translation_cache = data.get('translations', {})
|
||||
else:
|
||||
_translation_cache = data
|
||||
except (IOError, ValueError) as e:
|
||||
_log('cache load failed: %s' % str(e), 'debug')
|
||||
_translation_cache = {}
|
||||
|
||||
|
||||
def _save_cache():
|
||||
"""Save translation cache to disk."""
|
||||
try:
|
||||
data = {
|
||||
'_meta': {
|
||||
'version': 1,
|
||||
'updated': int(time.time()),
|
||||
'count': len(_translation_cache)
|
||||
},
|
||||
'translations': _translation_cache
|
||||
}
|
||||
with open(_cache_file, 'w') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
except IOError as e:
|
||||
_log('cache save failed: %s' % str(e), 'debug')
|
||||
|
||||
|
||||
def translate_libretranslate(text, target_lang, source_lang='en', api_url=None):
|
||||
"""Translate text using LibreTranslate API.
|
||||
|
||||
Args:
|
||||
text: Text to translate
|
||||
target_lang: Target language code (e.g., 'ru', 'zh')
|
||||
source_lang: Source language code (default: 'en')
|
||||
api_url: LibreTranslate API URL (uses configured default if None)
|
||||
|
||||
Returns:
|
||||
Translated text or None on failure
|
||||
"""
|
||||
global _failed_translations
|
||||
|
||||
if not _libretranslate_enabled:
|
||||
return None
|
||||
|
||||
if api_url is None:
|
||||
api_url = _libretranslate_url
|
||||
|
||||
# Map language codes (e.g., zh -> zh-Hans for the API)
|
||||
api_target = target_lang
|
||||
if target_lang == 'zh':
|
||||
api_target = 'zh-Hans'
|
||||
|
||||
# Check if target language is available (fetch if not checked yet)
|
||||
if not _libretranslate_langs_checked:
|
||||
_fetch_available_languages()
|
||||
|
||||
if target_lang not in _libretranslate_langs:
|
||||
return None
|
||||
|
||||
cache_key = '%s:%s:%s' % (source_lang, target_lang, text)
|
||||
|
||||
# Check cache first
|
||||
if cache_key in _translation_cache:
|
||||
cached = _translation_cache[cache_key]
|
||||
# Handle both old format (string) and new format (dict with timestamp)
|
||||
if isinstance(cached, dict):
|
||||
if time.time() - cached.get('time', 0) < _cache_max_age:
|
||||
return cached.get('text')
|
||||
else:
|
||||
return cached
|
||||
|
||||
# Check if we recently failed this translation
|
||||
if cache_key in _failed_translations:
|
||||
if time.time() - _failed_translations[cache_key] < _failed_cache_ttl:
|
||||
return None
|
||||
|
||||
try:
|
||||
import urllib2
|
||||
data = json.dumps({
|
||||
'q': text,
|
||||
'source': source_lang,
|
||||
'target': api_target,
|
||||
'format': 'text',
|
||||
})
|
||||
req = urllib2.Request(api_url, data)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
req.add_header('User-Agent', 'PPF/1.0')
|
||||
resp = urllib2.urlopen(req, timeout=_libretranslate_timeout)
|
||||
result = json.loads(resp.read())
|
||||
|
||||
if 'translatedText' in result:
|
||||
translated = result['translatedText']
|
||||
# Store with timestamp for cache expiry
|
||||
_translation_cache[cache_key] = {
|
||||
'text': translated,
|
||||
'time': int(time.time())
|
||||
}
|
||||
_save_cache()
|
||||
_log('translated [%s]: %s -> %s' % (target_lang, text, translated), 'debug')
|
||||
return translated
|
||||
|
||||
except Exception as e:
|
||||
_failed_translations[cache_key] = time.time()
|
||||
_log('translation failed [%s->%s]: %s' % (source_lang, target_lang, str(e)), 'debug')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_cache_stats():
|
||||
"""Return cache statistics.
|
||||
|
||||
Returns:
|
||||
dict with cache stats
|
||||
"""
|
||||
return {
|
||||
'entries': len(_translation_cache),
|
||||
'failed_pending': len(_failed_translations),
|
||||
'file': _cache_file,
|
||||
'enabled': _libretranslate_enabled,
|
||||
'url': _libretranslate_url,
|
||||
'api_languages': len(_libretranslate_langs),
|
||||
}
|
||||
|
||||
|
||||
def get_translated_term(term=None, lang=None, use_api=True):
|
||||
"""Get a search term, optionally translated.
|
||||
|
||||
Args:
|
||||
term: Specific term to translate (or random if None)
|
||||
lang: Target language (or random if None)
|
||||
use_api: Whether to use LibreTranslate API for missing translations
|
||||
|
||||
Returns:
|
||||
(term, lang) tuple
|
||||
"""
|
||||
# Expand language list to include LibreTranslate-only languages
|
||||
all_langs = list(set(LANGUAGES) | _libretranslate_langs)
|
||||
|
||||
if lang is None:
|
||||
lang = random.choice(all_langs) if all_langs else 'en'
|
||||
|
||||
if lang == 'en':
|
||||
# For English, use full term list
|
||||
if term is None:
|
||||
term = random.choice(BASE_TERMS)
|
||||
return term, lang
|
||||
|
||||
# For other languages, pick from translatable terms
|
||||
if term is None:
|
||||
term = random.choice(TRANSLATABLE_TERMS)
|
||||
|
||||
# Try static translations first
|
||||
if lang in STATIC_TRANSLATIONS:
|
||||
translations = STATIC_TRANSLATIONS[lang]
|
||||
if term in translations:
|
||||
return translations[term], lang
|
||||
|
||||
# Try LibreTranslate API for missing translations
|
||||
if use_api and _libretranslate_enabled and lang in _libretranslate_langs:
|
||||
translated = translate_libretranslate(term, lang)
|
||||
if translated:
|
||||
return translated, lang
|
||||
|
||||
# Fall back to English with full term list
|
||||
return random.choice(BASE_TERMS), 'en'
|
||||
|
||||
|
||||
def get_random_search_term():
|
||||
"""Get a random search term in a random language.
|
||||
|
||||
Returns:
|
||||
Translated search term string
|
||||
"""
|
||||
term, lang = get_translated_term()
|
||||
return term
|
||||
|
||||
|
||||
def get_all_terms_for_language(lang):
|
||||
"""Get all search terms for a specific language.
|
||||
|
||||
Args:
|
||||
lang: Language code
|
||||
|
||||
Returns:
|
||||
List of translated terms
|
||||
"""
|
||||
if lang == 'en':
|
||||
return BASE_TERMS[:]
|
||||
|
||||
if lang in STATIC_TRANSLATIONS:
|
||||
return list(STATIC_TRANSLATIONS[lang].values())
|
||||
|
||||
return BASE_TERMS[:]
|
||||
|
||||
|
||||
def get_mixed_terms(count=5, english_weight=0.3):
|
||||
"""Get a mix of terms from different languages.
|
||||
|
||||
Args:
|
||||
count: Number of terms to return
|
||||
english_weight: Probability of including English terms
|
||||
|
||||
Returns:
|
||||
List of search terms in various languages
|
||||
"""
|
||||
terms = []
|
||||
for _ in range(count):
|
||||
if random.random() < english_weight:
|
||||
terms.append(random.choice(BASE_TERMS))
|
||||
else:
|
||||
terms.append(get_random_search_term())
|
||||
return terms
|
||||
|
||||
|
||||
# Load cache on module import
|
||||
_load_cache()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
|
||||
# Fetch available languages from API
|
||||
_fetch_available_languages()
|
||||
|
||||
# Test output
|
||||
print('LibreTranslate: %s' % ('enabled' if _libretranslate_enabled else 'disabled'))
|
||||
print('API URL: %s' % _libretranslate_url)
|
||||
print('Static languages: %s' % ', '.join(sorted(STATIC_TRANSLATIONS.keys())))
|
||||
api_only = _libretranslate_langs - set(STATIC_TRANSLATIONS.keys())
|
||||
print('API-only languages: %s' % (', '.join(sorted(api_only)) if api_only else 'none'))
|
||||
print('')
|
||||
|
||||
# Cache stats
|
||||
stats = get_cache_stats()
|
||||
print('Cache: %d entries in %s' % (stats['entries'], stats['file']))
|
||||
print('')
|
||||
|
||||
print('Sample static translations:')
|
||||
for lang in sorted(STATIC_TRANSLATIONS.keys())[:5]:
|
||||
term, _ = get_translated_term('free proxy list', lang, use_api=False)
|
||||
if isinstance(term, unicode):
|
||||
print(' [%s] %s' % (lang, term.encode('utf-8')))
|
||||
else:
|
||||
print(' [%s] %s' % (lang, term))
|
||||
|
||||
print('')
|
||||
|
||||
# Test LibreTranslate if --test-api flag
|
||||
if '--test-api' in sys.argv:
|
||||
print('Testing LibreTranslate API...')
|
||||
# Use languages that are API-available but not in static translations
|
||||
test_langs = list(api_only)[:5] if api_only else ['fr', 'ar']
|
||||
for lang in test_langs:
|
||||
term, result_lang = get_translated_term('free proxy list', lang, use_api=True)
|
||||
if isinstance(term, unicode):
|
||||
print(' [%s] %s' % (result_lang, term.encode('utf-8')))
|
||||
else:
|
||||
print(' [%s] %s' % (result_lang, term))
|
||||
print('')
|
||||
stats = get_cache_stats()
|
||||
print('Cache after API test: %d entries' % stats['entries'])
|
||||
else:
|
||||
print('Run with --test-api to test LibreTranslate API')
|
||||
|
||||
print('')
|
||||
print('Random mixed terms:')
|
||||
for term in get_mixed_terms(10, english_weight=0.2):
|
||||
if isinstance(term, unicode):
|
||||
print(' ', term.encode('utf-8'))
|
||||
else:
|
||||
print(' ', term)
|
||||
Reference in New Issue
Block a user