ppf: add retrievals field so we know whether an url is new

use

sqlite3 urls.sqlite "alter table uris add retrievals INT"
sqlite3 urls.sqlite "update uris set retrievals=0"
This commit is contained in:
rofl0r
2019-01-12 16:07:56 +00:00
committed by mickael
parent bc41bad9de
commit 69d366f7eb
2 changed files with 7 additions and 5 deletions

1
dbs.py
View File

@@ -19,6 +19,7 @@ def create_table_if_not_exists(sqlite, dbname):
check_time INT,
error INT,
stale_count INT,
retrievals INT,
added INT
)""")

11
ppf.py
View File

@@ -152,14 +152,14 @@ def insert_proxies(proxydb, proxies, url):
_log('+%d item(s) from %s' % (len(proxies), url), 'added')
def proxyleech(proxydb, urldb, url, stale_count, error):
def proxyleech(proxydb, urldb, url, stale_count, error, retrievals):
try: content = fetch_contents(url)
except KeyboardInterrupt as e: raise e
except: content = ''
unique_count, new = extract_proxies(content)
if stale_count == 0 and error == 0: # new site
if retrievals == 0: # new site
if content != '' and unique_count == 0: # site works but has zero proxy addresses
error = 99999
else:
@@ -170,9 +170,10 @@ def proxyleech(proxydb, urldb, url, stale_count, error):
if content == '':
error += 1
else:
retrievals += 1
error = 0
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url))
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=? where url=?', (error, stale_count, int(time.time()), retrievals, url))
urldb.commit()
if not len(new): return
@@ -247,10 +248,10 @@ if __name__ == '__main__':
while True:
try:
## any site that needs to be checked ?
rows = urldb.execute('SELECT url,stale_count,error FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
rows = urldb.execute('SELECT url,stale_count,error,retrievals FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
for row in rows:
proxyleech(proxydb, urldb, row[0], row[1], row[2])
proxyleech(proxydb, urldb, row[0], row[1], row[2], row[3])
## search for new website during free time
if config.ppf.search: proxyfind(urldb)