Compare commits
2 Commits
028be1631d
...
1440378c20
Author | SHA1 | Date |
---|---|---|
ibu | 1440378c20 | |
ibu | 8246ce6251 |
|
@ -76,10 +76,10 @@ elasticsearch:
|
||||||
# host on which ES is running
|
# host on which ES is running
|
||||||
host: localhost
|
host: localhost
|
||||||
# API key for accessing ES
|
# API key for accessing ES
|
||||||
api_key: "**********************"
|
api_key: "____________________"
|
||||||
# API user id
|
# API user id
|
||||||
id: "**********************"
|
id: "____________________"
|
||||||
# Index base name (actual index names will have '_text' etc. appended)
|
# Index base name (full index names will have '_text_{language}' appended)
|
||||||
index_base_name: atext
|
index_base_name: atext
|
||||||
|
|
||||||
# Tensorflow access
|
# Tensorflow access
|
||||||
|
|
|
@ -99,14 +99,19 @@ async def get_site_path(
|
||||||
Return the next path of a given site that needs to be processed.
|
Return the next path of a given site that needs to be processed.
|
||||||
|
|
||||||
If none needs to be processed, return None.
|
If none needs to be processed, return None.
|
||||||
|
I particular, for sites having crawl_enabled=false return None.
|
||||||
|
|
||||||
Only return paths that have last been visited before *before*
|
Only return paths that have last been visited before *before*
|
||||||
or not been processed at all. Paths with a ok_count of -3 or lower
|
or not been processed at all. Paths with an ok_count of -3 or lower
|
||||||
are dropped.
|
are dropped.
|
||||||
|
|
||||||
If *only_new*, limit to paths that have not been processed at all,
|
If *only_new*, limit to paths that have not been processed at all,
|
||||||
irrespective of the value of *before*.
|
irrespective of the value of *before*.
|
||||||
"""
|
"""
|
||||||
|
sql = "SELECT crawl_enabled FROM site WHERE id=$1"
|
||||||
|
crawl_enabled = await conn.fetchval(sql, site.id_)
|
||||||
|
if not crawl_enabled:
|
||||||
|
return None
|
||||||
if only_new:
|
if only_new:
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT * FROM site_path"
|
"SELECT * FROM site_path"
|
||||||
|
|
Loading…
Reference in New Issue