- Static translations for 15 languages (ru, zh, es, pt, de, fr, ja, ko, ar, id, tr, vi, th, pl, uk) - LibreTranslate API integration with configurable endpoint - Dynamic language detection from API /languages endpoint - Persistent JSON cache with 30-day TTL - Categorized search terms: generic, protocol, anonymity, freshness, format, sources, geographic, use-case, search operators - Dynamic year substitution for freshness terms
675 lines
22 KiB
Python
675 lines
22 KiB
Python
#!/usr/bin/env python2
|
||
# -*- coding: utf-8 -*-
|
||
"""Multi-lingual search term generation with LibreTranslate support."""
|
||
|
||
import random
|
||
import os
|
||
import json
|
||
import time
|
||
import datetime
|
||
from misc import _log
|
||
|
||
# Current year for freshness terms
|
||
CURRENT_YEAR = str(datetime.datetime.now().year)
|
||
|
||
# Module-level configuration (set by set_config())
|
||
_libretranslate_url = 'https://lt.mymx.me/translate'
|
||
_libretranslate_enabled = True
|
||
_libretranslate_timeout = 10
|
||
|
||
# Base English terms - categorized for better coverage
|
||
BASE_TERMS_GENERIC = [
|
||
'free proxy list',
|
||
'proxy server list',
|
||
'public proxy list',
|
||
'open proxy list',
|
||
]
|
||
|
||
BASE_TERMS_PROTOCOL = [
|
||
'socks5 proxy list',
|
||
'socks4 proxy list',
|
||
'http proxy list',
|
||
'https proxy list',
|
||
]
|
||
|
||
BASE_TERMS_ANONYMITY = [
|
||
'anonymous proxy',
|
||
'elite proxy',
|
||
'high anonymity proxy',
|
||
'transparent proxy list',
|
||
]
|
||
|
||
BASE_TERMS_FRESHNESS = [
|
||
'fresh proxy list',
|
||
'working proxy list',
|
||
'verified proxy list',
|
||
'checked proxy list',
|
||
'live proxy list',
|
||
'proxy list today',
|
||
'proxy list updated',
|
||
'proxy list ' + CURRENT_YEAR,
|
||
'new proxy list ' + CURRENT_YEAR,
|
||
]
|
||
|
||
BASE_TERMS_FORMAT = [
|
||
'proxy list txt',
|
||
'proxy list ip port',
|
||
'proxy list download',
|
||
'proxy txt file',
|
||
]
|
||
|
||
BASE_TERMS_SOURCES = [
|
||
'proxy pastebin',
|
||
'proxy github',
|
||
'proxy list telegram',
|
||
'free proxy api',
|
||
]
|
||
|
||
BASE_TERMS_GEOGRAPHIC = [
|
||
'US proxy list',
|
||
'USA proxy',
|
||
'Europe proxy list',
|
||
'Asia proxy list',
|
||
'Russia proxy list',
|
||
'China proxy list',
|
||
]
|
||
|
||
BASE_TERMS_USECASE = [
|
||
'proxy for scraping',
|
||
'fast proxy list',
|
||
'residential proxy list',
|
||
'datacenter proxy list',
|
||
]
|
||
|
||
BASE_TERMS_SEARCH_OPS = [
|
||
'filetype:txt proxy list',
|
||
'inurl:proxy.txt',
|
||
'inurl:proxies.txt',
|
||
'intitle:proxy list',
|
||
]
|
||
|
||
# Combined list for random selection
|
||
BASE_TERMS = (
|
||
BASE_TERMS_GENERIC +
|
||
BASE_TERMS_PROTOCOL +
|
||
BASE_TERMS_ANONYMITY +
|
||
BASE_TERMS_FRESHNESS +
|
||
BASE_TERMS_FORMAT +
|
||
BASE_TERMS_SOURCES +
|
||
BASE_TERMS_GEOGRAPHIC +
|
||
BASE_TERMS_USECASE +
|
||
BASE_TERMS_SEARCH_OPS
|
||
)
|
||
|
||
# Terms that should be translated (exclude search operators and technical terms)
|
||
TRANSLATABLE_TERMS = (
|
||
BASE_TERMS_GENERIC +
|
||
BASE_TERMS_ANONYMITY +
|
||
BASE_TERMS_FRESHNESS
|
||
)
|
||
|
||
# Static translations - no API needed
|
||
# Format: {lang_code: {english_term: translated_term}}
|
||
STATIC_TRANSLATIONS = {
|
||
'ru': {
|
||
'free proxy list': u'бесплатный список прокси',
|
||
'socks5 proxy': u'socks5 прокси',
|
||
'socks4 proxy': u'socks4 прокси',
|
||
'http proxy': u'http прокси',
|
||
'proxy server list': u'список прокси серверов',
|
||
'anonymous proxy': u'анонимный прокси',
|
||
'elite proxy': u'элитный прокси',
|
||
'fresh proxy': u'свежие прокси',
|
||
'working proxy': u'рабочие прокси',
|
||
'proxy list updated': u'обновленный список прокси',
|
||
},
|
||
'zh': {
|
||
'free proxy list': u'免费代理列表',
|
||
'socks5 proxy': u'socks5代理',
|
||
'socks4 proxy': u'socks4代理',
|
||
'http proxy': u'http代理',
|
||
'proxy server list': u'代理服务器列表',
|
||
'anonymous proxy': u'匿名代理',
|
||
'elite proxy': u'高匿代理',
|
||
'fresh proxy': u'最新代理',
|
||
'working proxy': u'可用代理',
|
||
'proxy list updated': u'代理列表更新',
|
||
},
|
||
'es': {
|
||
'free proxy list': u'lista de proxies gratis',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'lista de servidores proxy',
|
||
'anonymous proxy': u'proxy anónimo',
|
||
'elite proxy': u'proxy elite',
|
||
'fresh proxy': u'proxies frescos',
|
||
'working proxy': u'proxies funcionando',
|
||
'proxy list updated': u'lista de proxies actualizada',
|
||
},
|
||
'pt': {
|
||
'free proxy list': u'lista de proxy grátis',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'lista de servidores proxy',
|
||
'anonymous proxy': u'proxy anônimo',
|
||
'elite proxy': u'proxy elite',
|
||
'fresh proxy': u'proxies novos',
|
||
'working proxy': u'proxies funcionando',
|
||
'proxy list updated': u'lista de proxy atualizada',
|
||
},
|
||
'de': {
|
||
'free proxy list': u'kostenlose Proxy-Liste',
|
||
'socks5 proxy': u'socks5 Proxy',
|
||
'socks4 proxy': u'socks4 Proxy',
|
||
'http proxy': u'http Proxy',
|
||
'proxy server list': u'Proxy-Server-Liste',
|
||
'anonymous proxy': u'anonymer Proxy',
|
||
'elite proxy': u'Elite-Proxy',
|
||
'fresh proxy': u'frische Proxys',
|
||
'working proxy': u'funktionierende Proxys',
|
||
'proxy list updated': u'aktualisierte Proxy-Liste',
|
||
},
|
||
'fr': {
|
||
'free proxy list': u'liste de proxy gratuit',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'liste de serveurs proxy',
|
||
'anonymous proxy': u'proxy anonyme',
|
||
'elite proxy': u'proxy élite',
|
||
'fresh proxy': u'proxies frais',
|
||
'working proxy': u'proxies fonctionnels',
|
||
'proxy list updated': u'liste de proxy mise à jour',
|
||
},
|
||
'ja': {
|
||
'free proxy list': u'無料プロキシリスト',
|
||
'socks5 proxy': u'socks5プロキシ',
|
||
'socks4 proxy': u'socks4プロキシ',
|
||
'http proxy': u'httpプロキシ',
|
||
'proxy server list': u'プロキシサーバーリスト',
|
||
'anonymous proxy': u'匿名プロキシ',
|
||
'elite proxy': u'エリートプロキシ',
|
||
'fresh proxy': u'最新プロキシ',
|
||
'working proxy': u'動作するプロキシ',
|
||
'proxy list updated': u'プロキシリスト更新',
|
||
},
|
||
'ko': {
|
||
'free proxy list': u'무료 프록시 목록',
|
||
'socks5 proxy': u'socks5 프록시',
|
||
'socks4 proxy': u'socks4 프록시',
|
||
'http proxy': u'http 프록시',
|
||
'proxy server list': u'프록시 서버 목록',
|
||
'anonymous proxy': u'익명 프록시',
|
||
'elite proxy': u'엘리트 프록시',
|
||
'fresh proxy': u'최신 프록시',
|
||
'working proxy': u'작동하는 프록시',
|
||
'proxy list updated': u'프록시 목록 업데이트',
|
||
},
|
||
'ar': {
|
||
'free proxy list': u'قائمة بروكسي مجانية',
|
||
'socks5 proxy': u'بروكسي socks5',
|
||
'socks4 proxy': u'بروكسي socks4',
|
||
'http proxy': u'بروكسي http',
|
||
'proxy server list': u'قائمة خوادم البروكسي',
|
||
'anonymous proxy': u'بروكسي مجهول',
|
||
'elite proxy': u'بروكسي نخبة',
|
||
'fresh proxy': u'بروكسي جديد',
|
||
'working proxy': u'بروكسي يعمل',
|
||
'proxy list updated': u'قائمة بروكسي محدثة',
|
||
},
|
||
'id': {
|
||
'free proxy list': u'daftar proxy gratis',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'daftar server proxy',
|
||
'anonymous proxy': u'proxy anonim',
|
||
'elite proxy': u'proxy elite',
|
||
'fresh proxy': u'proxy baru',
|
||
'working proxy': u'proxy aktif',
|
||
'proxy list updated': u'daftar proxy diperbarui',
|
||
},
|
||
'tr': {
|
||
'free proxy list': u'ücretsiz proxy listesi',
|
||
'socks5 proxy': u'socks5 proxy',
|
||
'socks4 proxy': u'socks4 proxy',
|
||
'http proxy': u'http proxy',
|
||
'proxy server list': u'proxy sunucu listesi',
|
||
'anonymous proxy': u'anonim proxy',
|
||
'elite proxy': u'elit proxy',
|
||
'fresh proxy': u'güncel proxy',
|
||
'working proxy': u'çalışan proxy',
|
||
'proxy list updated': u'güncellenmiş proxy listesi',
|
||
},
|
||
'vi': {
|
||
'free proxy list': u'danh sách proxy miễn phí',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'danh sách máy chủ proxy',
|
||
'anonymous proxy': u'proxy ẩn danh',
|
||
'elite proxy': u'proxy cao cấp',
|
||
'fresh proxy': u'proxy mới',
|
||
'working proxy': u'proxy hoạt động',
|
||
'proxy list updated': u'danh sách proxy cập nhật',
|
||
},
|
||
'th': {
|
||
'free proxy list': u'รายการพร็อกซี่ฟรี',
|
||
'socks5 proxy': u'พร็อกซี่ socks5',
|
||
'socks4 proxy': u'พร็อกซี่ socks4',
|
||
'http proxy': u'พร็อกซี่ http',
|
||
'proxy server list': u'รายการเซิร์ฟเวอร์พร็อกซี่',
|
||
'anonymous proxy': u'พร็อกซี่นิรนาม',
|
||
'elite proxy': u'พร็อกซี่ระดับสูง',
|
||
'fresh proxy': u'พร็อกซี่ใหม่',
|
||
'working proxy': u'พร็อกซี่ใช้งานได้',
|
||
'proxy list updated': u'รายการพร็อกซี่อัพเดท',
|
||
},
|
||
'pl': {
|
||
'free proxy list': u'darmowa lista proxy',
|
||
'socks5 proxy': u'proxy socks5',
|
||
'socks4 proxy': u'proxy socks4',
|
||
'http proxy': u'proxy http',
|
||
'proxy server list': u'lista serwerów proxy',
|
||
'anonymous proxy': u'anonimowe proxy',
|
||
'elite proxy': u'elitarne proxy',
|
||
'fresh proxy': u'świeże proxy',
|
||
'working proxy': u'działające proxy',
|
||
'proxy list updated': u'zaktualizowana lista proxy',
|
||
},
|
||
'uk': {
|
||
'free proxy list': u'безкоштовний список проксі',
|
||
'socks5 proxy': u'socks5 проксі',
|
||
'socks4 proxy': u'socks4 проксі',
|
||
'http proxy': u'http проксі',
|
||
'proxy server list': u'список проксі серверів',
|
||
'anonymous proxy': u'анонімний проксі',
|
||
'elite proxy': u'елітний проксі',
|
||
'fresh proxy': u'свіжі проксі',
|
||
'working proxy': u'робочі проксі',
|
||
'proxy list updated': u'оновлений список проксі',
|
||
},
|
||
}
|
||
|
||
# All available languages
|
||
LANGUAGES = list(STATIC_TRANSLATIONS.keys()) + ['en']
|
||
|
||
# LibreTranslate available languages (populated dynamically)
|
||
_libretranslate_langs = set()
|
||
_libretranslate_langs_checked = False
|
||
|
||
# Cache for online translations
|
||
_translation_cache = {}
|
||
_cache_file = 'translation_cache.json'
|
||
_cache_max_age = 86400 * 30 # 30 days
|
||
_failed_translations = {} # Track failed translations to avoid repeated API calls
|
||
_failed_cache_ttl = 3600 # 1 hour before retrying failed translations
|
||
|
||
|
||
def set_config(config):
|
||
"""Configure translation settings from config object.
|
||
|
||
Args:
|
||
config: Config object with scraper.libretranslate_url and
|
||
scraper.libretranslate_enabled attributes
|
||
"""
|
||
global _libretranslate_url, _libretranslate_enabled
|
||
if hasattr(config, 'scraper'):
|
||
if hasattr(config.scraper, 'libretranslate_url'):
|
||
_libretranslate_url = config.scraper.libretranslate_url
|
||
if hasattr(config.scraper, 'libretranslate_enabled'):
|
||
_libretranslate_enabled = config.scraper.libretranslate_enabled
|
||
|
||
if _libretranslate_enabled:
|
||
_fetch_available_languages()
|
||
_log('LibreTranslate: enabled (%s) - %d languages' % (
|
||
_libretranslate_url, len(_libretranslate_langs)
|
||
), 'info')
|
||
else:
|
||
_log('LibreTranslate: disabled', 'debug')
|
||
|
||
|
||
def _fetch_available_languages():
|
||
"""Fetch available languages from LibreTranslate API.
|
||
|
||
Queries the /languages endpoint and extracts languages that can be
|
||
translated from English (source='en').
|
||
"""
|
||
global _libretranslate_langs, _libretranslate_langs_checked
|
||
|
||
if _libretranslate_langs_checked:
|
||
return
|
||
|
||
_libretranslate_langs_checked = True
|
||
|
||
# Derive base URL from translate endpoint
|
||
base_url = _libretranslate_url.rsplit('/', 1)[0]
|
||
languages_url = base_url + '/languages'
|
||
|
||
try:
|
||
import urllib2
|
||
req = urllib2.Request(languages_url)
|
||
req.add_header('Accept', 'application/json')
|
||
req.add_header('User-Agent', 'PPF/1.0')
|
||
resp = urllib2.urlopen(req, timeout=_libretranslate_timeout)
|
||
langs = json.loads(resp.read())
|
||
|
||
# Find English entry to get available target languages
|
||
en_targets = set()
|
||
for lang in langs:
|
||
if lang.get('code') == 'en':
|
||
en_targets = set(lang.get('targets', []))
|
||
break
|
||
|
||
# Also collect all language codes as fallback
|
||
all_codes = set()
|
||
for lang in langs:
|
||
code = lang.get('code', '')
|
||
if code.startswith('zh'):
|
||
all_codes.add('zh')
|
||
elif code:
|
||
all_codes.add(code)
|
||
|
||
# Use English targets if available, otherwise all codes
|
||
if en_targets:
|
||
# Normalize zh variants
|
||
codes = set()
|
||
for code in en_targets:
|
||
if code.startswith('zh'):
|
||
codes.add('zh')
|
||
elif code:
|
||
codes.add(code)
|
||
codes.discard('en')
|
||
_libretranslate_langs = codes
|
||
else:
|
||
all_codes.discard('en')
|
||
_libretranslate_langs = all_codes
|
||
|
||
_log('LibreTranslate languages: %s' % ', '.join(sorted(_libretranslate_langs)), 'debug')
|
||
|
||
except Exception as e:
|
||
_log('failed to fetch LibreTranslate languages: %s' % str(e), 'warn')
|
||
_libretranslate_langs = set()
|
||
|
||
|
||
def _load_cache():
|
||
"""Load translation cache from disk."""
|
||
global _translation_cache
|
||
if os.path.exists(_cache_file):
|
||
try:
|
||
with open(_cache_file, 'r') as f:
|
||
data = json.load(f)
|
||
# Handle both old format (just translations) and new format (with metadata)
|
||
if isinstance(data, dict) and '_meta' in data:
|
||
_translation_cache = data.get('translations', {})
|
||
else:
|
||
_translation_cache = data
|
||
except (IOError, ValueError) as e:
|
||
_log('cache load failed: %s' % str(e), 'debug')
|
||
_translation_cache = {}
|
||
|
||
|
||
def _save_cache():
|
||
"""Save translation cache to disk."""
|
||
try:
|
||
data = {
|
||
'_meta': {
|
||
'version': 1,
|
||
'updated': int(time.time()),
|
||
'count': len(_translation_cache)
|
||
},
|
||
'translations': _translation_cache
|
||
}
|
||
with open(_cache_file, 'w') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
except IOError as e:
|
||
_log('cache save failed: %s' % str(e), 'debug')
|
||
|
||
|
||
def translate_libretranslate(text, target_lang, source_lang='en', api_url=None):
|
||
"""Translate text using LibreTranslate API.
|
||
|
||
Args:
|
||
text: Text to translate
|
||
target_lang: Target language code (e.g., 'ru', 'zh')
|
||
source_lang: Source language code (default: 'en')
|
||
api_url: LibreTranslate API URL (uses configured default if None)
|
||
|
||
Returns:
|
||
Translated text or None on failure
|
||
"""
|
||
global _failed_translations
|
||
|
||
if not _libretranslate_enabled:
|
||
return None
|
||
|
||
if api_url is None:
|
||
api_url = _libretranslate_url
|
||
|
||
# Map language codes (e.g., zh -> zh-Hans for the API)
|
||
api_target = target_lang
|
||
if target_lang == 'zh':
|
||
api_target = 'zh-Hans'
|
||
|
||
# Check if target language is available (fetch if not checked yet)
|
||
if not _libretranslate_langs_checked:
|
||
_fetch_available_languages()
|
||
|
||
if target_lang not in _libretranslate_langs:
|
||
return None
|
||
|
||
cache_key = '%s:%s:%s' % (source_lang, target_lang, text)
|
||
|
||
# Check cache first
|
||
if cache_key in _translation_cache:
|
||
cached = _translation_cache[cache_key]
|
||
# Handle both old format (string) and new format (dict with timestamp)
|
||
if isinstance(cached, dict):
|
||
if time.time() - cached.get('time', 0) < _cache_max_age:
|
||
return cached.get('text')
|
||
else:
|
||
return cached
|
||
|
||
# Check if we recently failed this translation
|
||
if cache_key in _failed_translations:
|
||
if time.time() - _failed_translations[cache_key] < _failed_cache_ttl:
|
||
return None
|
||
|
||
try:
|
||
import urllib2
|
||
data = json.dumps({
|
||
'q': text,
|
||
'source': source_lang,
|
||
'target': api_target,
|
||
'format': 'text',
|
||
})
|
||
req = urllib2.Request(api_url, data)
|
||
req.add_header('Content-Type', 'application/json')
|
||
req.add_header('User-Agent', 'PPF/1.0')
|
||
resp = urllib2.urlopen(req, timeout=_libretranslate_timeout)
|
||
result = json.loads(resp.read())
|
||
|
||
if 'translatedText' in result:
|
||
translated = result['translatedText']
|
||
# Store with timestamp for cache expiry
|
||
_translation_cache[cache_key] = {
|
||
'text': translated,
|
||
'time': int(time.time())
|
||
}
|
||
_save_cache()
|
||
_log('translated [%s]: %s -> %s' % (target_lang, text, translated), 'debug')
|
||
return translated
|
||
|
||
except Exception as e:
|
||
_failed_translations[cache_key] = time.time()
|
||
_log('translation failed [%s->%s]: %s' % (source_lang, target_lang, str(e)), 'debug')
|
||
|
||
return None
|
||
|
||
|
||
def get_cache_stats():
|
||
"""Return cache statistics.
|
||
|
||
Returns:
|
||
dict with cache stats
|
||
"""
|
||
return {
|
||
'entries': len(_translation_cache),
|
||
'failed_pending': len(_failed_translations),
|
||
'file': _cache_file,
|
||
'enabled': _libretranslate_enabled,
|
||
'url': _libretranslate_url,
|
||
'api_languages': len(_libretranslate_langs),
|
||
}
|
||
|
||
|
||
def get_translated_term(term=None, lang=None, use_api=True):
|
||
"""Get a search term, optionally translated.
|
||
|
||
Args:
|
||
term: Specific term to translate (or random if None)
|
||
lang: Target language (or random if None)
|
||
use_api: Whether to use LibreTranslate API for missing translations
|
||
|
||
Returns:
|
||
(term, lang) tuple
|
||
"""
|
||
# Expand language list to include LibreTranslate-only languages
|
||
all_langs = list(set(LANGUAGES) | _libretranslate_langs)
|
||
|
||
if lang is None:
|
||
lang = random.choice(all_langs) if all_langs else 'en'
|
||
|
||
if lang == 'en':
|
||
# For English, use full term list
|
||
if term is None:
|
||
term = random.choice(BASE_TERMS)
|
||
return term, lang
|
||
|
||
# For other languages, pick from translatable terms
|
||
if term is None:
|
||
term = random.choice(TRANSLATABLE_TERMS)
|
||
|
||
# Try static translations first
|
||
if lang in STATIC_TRANSLATIONS:
|
||
translations = STATIC_TRANSLATIONS[lang]
|
||
if term in translations:
|
||
return translations[term], lang
|
||
|
||
# Try LibreTranslate API for missing translations
|
||
if use_api and _libretranslate_enabled and lang in _libretranslate_langs:
|
||
translated = translate_libretranslate(term, lang)
|
||
if translated:
|
||
return translated, lang
|
||
|
||
# Fall back to English with full term list
|
||
return random.choice(BASE_TERMS), 'en'
|
||
|
||
|
||
def get_random_search_term():
|
||
"""Get a random search term in a random language.
|
||
|
||
Returns:
|
||
Translated search term string
|
||
"""
|
||
term, lang = get_translated_term()
|
||
return term
|
||
|
||
|
||
def get_all_terms_for_language(lang):
|
||
"""Get all search terms for a specific language.
|
||
|
||
Args:
|
||
lang: Language code
|
||
|
||
Returns:
|
||
List of translated terms
|
||
"""
|
||
if lang == 'en':
|
||
return BASE_TERMS[:]
|
||
|
||
if lang in STATIC_TRANSLATIONS:
|
||
return list(STATIC_TRANSLATIONS[lang].values())
|
||
|
||
return BASE_TERMS[:]
|
||
|
||
|
||
def get_mixed_terms(count=5, english_weight=0.3):
|
||
"""Get a mix of terms from different languages.
|
||
|
||
Args:
|
||
count: Number of terms to return
|
||
english_weight: Probability of including English terms
|
||
|
||
Returns:
|
||
List of search terms in various languages
|
||
"""
|
||
terms = []
|
||
for _ in range(count):
|
||
if random.random() < english_weight:
|
||
terms.append(random.choice(BASE_TERMS))
|
||
else:
|
||
terms.append(get_random_search_term())
|
||
return terms
|
||
|
||
|
||
# Load cache on module import
|
||
_load_cache()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
import sys
|
||
|
||
# Fetch available languages from API
|
||
_fetch_available_languages()
|
||
|
||
# Test output
|
||
print('LibreTranslate: %s' % ('enabled' if _libretranslate_enabled else 'disabled'))
|
||
print('API URL: %s' % _libretranslate_url)
|
||
print('Static languages: %s' % ', '.join(sorted(STATIC_TRANSLATIONS.keys())))
|
||
api_only = _libretranslate_langs - set(STATIC_TRANSLATIONS.keys())
|
||
print('API-only languages: %s' % (', '.join(sorted(api_only)) if api_only else 'none'))
|
||
print('')
|
||
|
||
# Cache stats
|
||
stats = get_cache_stats()
|
||
print('Cache: %d entries in %s' % (stats['entries'], stats['file']))
|
||
print('')
|
||
|
||
print('Sample static translations:')
|
||
for lang in sorted(STATIC_TRANSLATIONS.keys())[:5]:
|
||
term, _ = get_translated_term('free proxy list', lang, use_api=False)
|
||
if isinstance(term, unicode):
|
||
print(' [%s] %s' % (lang, term.encode('utf-8')))
|
||
else:
|
||
print(' [%s] %s' % (lang, term))
|
||
|
||
print('')
|
||
|
||
# Test LibreTranslate if --test-api flag
|
||
if '--test-api' in sys.argv:
|
||
print('Testing LibreTranslate API...')
|
||
# Use languages that are API-available but not in static translations
|
||
test_langs = list(api_only)[:5] if api_only else ['fr', 'ar']
|
||
for lang in test_langs:
|
||
term, result_lang = get_translated_term('free proxy list', lang, use_api=True)
|
||
if isinstance(term, unicode):
|
||
print(' [%s] %s' % (result_lang, term.encode('utf-8')))
|
||
else:
|
||
print(' [%s] %s' % (result_lang, term))
|
||
print('')
|
||
stats = get_cache_stats()
|
||
print('Cache after API test: %d entries' % stats['entries'])
|
||
else:
|
||
print('Run with --test-api to test LibreTranslate API')
|
||
|
||
print('')
|
||
print('Random mixed terms:')
|
||
for term in get_mixed_terms(10, english_weight=0.2):
|
||
if isinstance(term, unicode):
|
||
print(' ', term.encode('utf-8'))
|
||
else:
|
||
print(' ', term)
|