Improve search.

* do not only return the 10 items from the first page, but loop over at most
  `max_pages` pages
* use https://usa.anarchistlibraries.net/ as fallback when the main site is
  not available
* drop formats TXT, TEX, MUSE from search results as they cannot be displayed
  in calibre
* on python3 use quote_plus instead of quote
* obtain the version number in the user agent string from module
  TheAnarchistLibraryStore
This commit is contained in:
ibu ☉ radempa 2020-12-31 16:26:36 +00:00
parent fc96806830
commit e4daeb15d2
1 changed files with 38 additions and 15 deletions

View File

@ -1,24 +1,37 @@
__license__ = 'GPL 3'
__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>'
__copyright__ = '2012, Ruben Pollan <meskio@sindominio.net>; 2020, ibu radempa <ibu@radempa.de>'
__docformat__ = 'restructuredtext en'
import json
try:
from urllib.parse import quote
from urllib.parse import quote_plus as quote
except:
from urllib2 import quote
try:
from PyQt5.Qt import QUrl
except:
from PyQt4.Qt import QUrl
from contextlib import closing
import json
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
from . import TheAnarchistLibraryStore
url1 = 'https://theanarchistlibrary.org/search?fmt=json&page=%s&query=%s'
url2 = 'https://usa.anarchistlibraries.net/search?fmt=json&page=%s&query=%s'
"""Search URLs. If the library has no fallback url, set url2 = None."""
max_pages = 10
"""Page limit. (amusewiki gives us 10 results per page.)"""
user_agent = 'Calibre plugin calibre-tal v' + '{}.{}.{}'.format(*TheAnarchistLibraryStore.version)
class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
@ -33,14 +46,28 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://theanarchistlibrary.org/search?fmt=json&query=' + quote(query)
def search(self, query, max_results=10, timeout=10):
br = browser(user_agent=user_agent)
page = 0
while page < max_pages:
page += 1
try:
for result in self._iter_search_results(br, url1, page, query, timeout):
if result is False:
return
yield result
except:
if url2:
for result in self._iter_search_results(br, url2, page, query, timeout):
if result is False:
return
yield result
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
def _iter_search_results(self, br, url, page, query, timeout):
with closing(br.open(url % (page, quote(query)), timeout=timeout)) as f:
doc = json.load(f)
if not doc:
yield False
for data in doc:
s = SearchResult()
s.title = data['title'].strip()
@ -52,9 +79,5 @@ class TheAnarchistLibraryStore(BasicStoreConfig, StorePlugin):
s.downloads['PDF'] = data['url'].strip() + '.pdf'
s.downloads['A4.PDF'] = data['url'].strip() + '.a4.pdf'
s.downloads['LT.PDF'] = data['url'].strip() + '.lt.pdf'
s.downloads['TXT'] = data['url'].strip() + '.txt'
s.downloads['TEX'] = data['url'].strip() + '.tex'
s.downloads['MUSE'] = data['url'].strip() + '.muse'
s.formats = 'EPUB, PDF, A4.PDF, LT.PDF, TXT, TEX, MUSE'
s.formats = 'EPUB, PDF, A4.PDF, LT.PDF'
yield s