ppf: add retrievals field so we know whether an url is new
use sqlite3 urls.sqlite "alter table uris add retrievals INT" sqlite3 urls.sqlite "update uris set retrievals=0"
This commit is contained in:
1
dbs.py
1
dbs.py
@@ -19,6 +19,7 @@ def create_table_if_not_exists(sqlite, dbname):
|
||||
check_time INT,
|
||||
error INT,
|
||||
stale_count INT,
|
||||
retrievals INT,
|
||||
added INT
|
||||
)""")
|
||||
|
||||
|
||||
11
ppf.py
11
ppf.py
@@ -152,14 +152,14 @@ def insert_proxies(proxydb, proxies, url):
|
||||
_log('+%d item(s) from %s' % (len(proxies), url), 'added')
|
||||
|
||||
|
||||
def proxyleech(proxydb, urldb, url, stale_count, error):
|
||||
def proxyleech(proxydb, urldb, url, stale_count, error, retrievals):
|
||||
try: content = fetch_contents(url)
|
||||
except KeyboardInterrupt as e: raise e
|
||||
except: content = ''
|
||||
|
||||
unique_count, new = extract_proxies(content)
|
||||
|
||||
if stale_count == 0 and error == 0: # new site
|
||||
if retrievals == 0: # new site
|
||||
if content != '' and unique_count == 0: # site works but has zero proxy addresses
|
||||
error = 99999
|
||||
else:
|
||||
@@ -170,9 +170,10 @@ def proxyleech(proxydb, urldb, url, stale_count, error):
|
||||
if content == '':
|
||||
error += 1
|
||||
else:
|
||||
retrievals += 1
|
||||
error = 0
|
||||
|
||||
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url))
|
||||
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=? where url=?', (error, stale_count, int(time.time()), retrievals, url))
|
||||
urldb.commit()
|
||||
|
||||
if not len(new): return
|
||||
@@ -247,10 +248,10 @@ if __name__ == '__main__':
|
||||
while True:
|
||||
try:
|
||||
## any site that needs to be checked ?
|
||||
rows = urldb.execute('SELECT url,stale_count,error FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
|
||||
rows = urldb.execute('SELECT url,stale_count,error,retrievals FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
|
||||
|
||||
for row in rows:
|
||||
proxyleech(proxydb, urldb, row[0], row[1], row[2])
|
||||
proxyleech(proxydb, urldb, row[0], row[1], row[2], row[3])
|
||||
|
||||
## search for new website during free time
|
||||
if config.ppf.search: proxyfind(urldb)
|
||||
|
||||
Reference in New Issue
Block a user