25 lines
638 B
Python
25 lines
638 B
Python
"""
|
|
Plugin for filtering paths of a site to be retrieved.
|
|
|
|
This plugin implements :func:`sp_filter`.
|
|
"""
|
|
|
|
|
|
def sp_filter(site, path, robots) -> bool:
|
|
"""
|
|
Per-site path filter. Return whether the path shall be retrieved.
|
|
"""
|
|
if not robots.can_fetch_url(site.base_url + path):
|
|
return False
|
|
if 'amusewiki' in site.meta_info.get('generator', '').lower():
|
|
if any(
|
|
[
|
|
path.endswith(end)
|
|
for end in ('.html', '.epub', '.tex', '.zip', '.pdf')
|
|
]
|
|
):
|
|
return False
|
|
if '/bbselect?' in path:
|
|
return False
|
|
return True
|