""" Plugin for filtering paths of a site to be retrieved. This plugin implements :func:`sp_filter`. """ def sp_filter(site, path, robots) -> bool: """ Per-site path filter. Return whether the path shall be retrieved. """ if not robots.can_fetch_url(site.base_url + path): return False if 'amusewiki' in site.meta_info.get('generator', '').lower(): if any( [ path.endswith(end) for end in ('.html', '.epub', '.tex', '.zip', '.pdf') ] ): return False if '/bbselect?' in path: return False return True