Compare commits

..

No commits in common. "1440378c207866e98304abf0f6a084cd375e27cf" and "028be1631ded9038e6f23ae11c7502e6e3282cf3" have entirely different histories.

2 changed files with 4 additions and 9 deletions

View File

@ -76,10 +76,10 @@ elasticsearch:
# host on which ES is running
host: localhost
# API key for accessing ES
api_key: "____________________"
api_key: "**********************"
# API user id
id: "____________________"
# Index base name (full index names will have '_text_{language}' appended)
id: "**********************"
# Index base name (actual index names will have '_text' etc. appended)
index_base_name: atext
# Tensorflow access

View File

@ -99,19 +99,14 @@ async def get_site_path(
Return the next path of a given site that needs to be processed.
If none needs to be processed, return None.
I particular, for sites having crawl_enabled=false return None.
Only return paths that have last been visited before *before*
or not been processed at all. Paths with an ok_count of -3 or lower
or not been processed at all. Paths with a ok_count of -3 or lower
are dropped.
If *only_new*, limit to paths that have not been processed at all,
irrespective of the value of *before*.
"""
sql = "SELECT crawl_enabled FROM site WHERE id=$1"
crawl_enabled = await conn.fetchval(sql, site.id_)
if not crawl_enabled:
return None
if only_new:
sql = (
"SELECT * FROM site_path"