From e4daeb15d22f348454bcca08806623bea20780c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ibu=20=E2=98=89=20radempa?= Date: Thu, 31 Dec 2020 16:26:36 +0000 Subject: [PATCH] Improve search. * do not only return the 10 items from the first page, but loop over at most `max_pages` pages * use https://usa.anarchistlibraries.net/ as fallback when the main site is not available * drop formats TXT, TEX, MUSE from search results as they cannot be displayed in calibre * on python3 use quote_plus instead of quote * obtain the version number in the user agent string from module TheAnarchistLibraryStore --- .../theanarchistlibrary_plugin.py | 53 +++++++++++++------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/theanarchistlibrary_store/theanarchistlibrary_plugin.py b/theanarchistlibrary_store/theanarchistlibrary_plugin.py index 3b9ff3e..49b62ea 100644 --- a/theanarchistlibrary_store/theanarchistlibrary_plugin.py +++ b/theanarchistlibrary_store/theanarchistlibrary_plugin.py @@ -1,24 +1,37 @@ __license__ = 'GPL 3' -__copyright__ = '2012, Ruben Pollan ' +__copyright__ = '2012, Ruben Pollan ; 2020, ibu radempa ' __docformat__ = 'restructuredtext en' +import json try: - from urllib.parse import quote + from urllib.parse import quote_plus as quote except: from urllib2 import quote try: from PyQt5.Qt import QUrl except: from PyQt4.Qt import QUrl - from contextlib import closing -import json from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.web_store_dialog import WebStoreDialog +from . import TheAnarchistLibraryStore + + +url1 = 'https://theanarchistlibrary.org/search?fmt=json&page=%s&query=%s' +url2 = 'https://usa.anarchistlibraries.net/search?fmt=json&page=%s&query=%s' +"""Search URLs. If the library has no fallback url, set url2 = None.""" + + +max_pages = 10 +"""Page limit. (amusewiki gives us 10 results per page.)""" + + +user_agent = 'Calibre plugin calibre-tal v' + '{}.{}.{}'.format(*TheAnarchistLibraryStore.version) + class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin): @@ -33,14 +46,28 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin): d.set_tags(self.config.get('tags', '')) d.exec_() - def search(self, query, max_results=10, timeout=60): - url = 'http://theanarchistlibrary.org/search?fmt=json&query=' + quote(query) + def search(self, query, max_results=10, timeout=10): + br = browser(user_agent=user_agent) + page = 0 + while page < max_pages: + page += 1 + try: + for result in self._iter_search_results(br, url1, page, query, timeout): + if result is False: + return + yield result + except: + if url2: + for result in self._iter_search_results(br, url2, page, query, timeout): + if result is False: + return + yield result - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: + def _iter_search_results(self, br, url, page, query, timeout): + with closing(br.open(url % (page, quote(query)), timeout=timeout)) as f: doc = json.load(f) + if not doc: + yield False for data in doc: s = SearchResult() s.title = data['title'].strip() @@ -52,9 +79,5 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin): s.downloads['PDF'] = data['url'].strip() + '.pdf' s.downloads['A4.PDF'] = data['url'].strip() + '.a4.pdf' s.downloads['LT.PDF'] = data['url'].strip() + '.lt.pdf' - s.downloads['TXT'] = data['url'].strip() + '.txt' - s.downloads['TEX'] = data['url'].strip() + '.tex' - s.downloads['MUSE'] = data['url'].strip() + '.muse' - s.formats = 'EPUB, PDF, A4.PDF, LT.PDF, TXT, TEX, MUSE' - + s.formats = 'EPUB, PDF, A4.PDF, LT.PDF' yield s