ppf: add retrievals field so we know whether an url is new
use sqlite3 urls.sqlite "alter table uris add retrievals INT" sqlite3 urls.sqlite "update uris set retrievals=0"
This commit is contained in:
1
dbs.py
1
dbs.py
@@ -19,6 +19,7 @@ def create_table_if_not_exists(sqlite, dbname):
|
|||||||
check_time INT,
|
check_time INT,
|
||||||
error INT,
|
error INT,
|
||||||
stale_count INT,
|
stale_count INT,
|
||||||
|
retrievals INT,
|
||||||
added INT
|
added INT
|
||||||
)""")
|
)""")
|
||||||
|
|
||||||
|
|||||||
11
ppf.py
11
ppf.py
@@ -152,14 +152,14 @@ def insert_proxies(proxydb, proxies, url):
|
|||||||
_log('+%d item(s) from %s' % (len(proxies), url), 'added')
|
_log('+%d item(s) from %s' % (len(proxies), url), 'added')
|
||||||
|
|
||||||
|
|
||||||
def proxyleech(proxydb, urldb, url, stale_count, error):
|
def proxyleech(proxydb, urldb, url, stale_count, error, retrievals):
|
||||||
try: content = fetch_contents(url)
|
try: content = fetch_contents(url)
|
||||||
except KeyboardInterrupt as e: raise e
|
except KeyboardInterrupt as e: raise e
|
||||||
except: content = ''
|
except: content = ''
|
||||||
|
|
||||||
unique_count, new = extract_proxies(content)
|
unique_count, new = extract_proxies(content)
|
||||||
|
|
||||||
if stale_count == 0 and error == 0: # new site
|
if retrievals == 0: # new site
|
||||||
if content != '' and unique_count == 0: # site works but has zero proxy addresses
|
if content != '' and unique_count == 0: # site works but has zero proxy addresses
|
||||||
error = 99999
|
error = 99999
|
||||||
else:
|
else:
|
||||||
@@ -170,9 +170,10 @@ def proxyleech(proxydb, urldb, url, stale_count, error):
|
|||||||
if content == '':
|
if content == '':
|
||||||
error += 1
|
error += 1
|
||||||
else:
|
else:
|
||||||
|
retrievals += 1
|
||||||
error = 0
|
error = 0
|
||||||
|
|
||||||
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=? where url=?', (error, stale_count, int(time.time()), url))
|
urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=? where url=?', (error, stale_count, int(time.time()), retrievals, url))
|
||||||
urldb.commit()
|
urldb.commit()
|
||||||
|
|
||||||
if not len(new): return
|
if not len(new): return
|
||||||
@@ -247,10 +248,10 @@ if __name__ == '__main__':
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
## any site that needs to be checked ?
|
## any site that needs to be checked ?
|
||||||
rows = urldb.execute('SELECT url,stale_count,error FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
|
rows = urldb.execute('SELECT url,stale_count,error,retrievals FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM() LIMIT 25', (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
proxyleech(proxydb, urldb, row[0], row[1], row[2])
|
proxyleech(proxydb, urldb, row[0], row[1], row[2], row[3])
|
||||||
|
|
||||||
## search for new website during free time
|
## search for new website during free time
|
||||||
if config.ppf.search: proxyfind(urldb)
|
if config.ppf.search: proxyfind(urldb)
|
||||||
|
|||||||
Reference in New Issue
Block a user