From a6af5b12d23650bb8942a562560f3ab8b535271e Mon Sep 17 00:00:00 2001
From: ibu <ibu@radempa.de>
Date: Mon, 29 Nov 2021 09:16:31 +0000
Subject: [PATCH] Put under version control

---
 .gitignore                                    |    51 +
 .pre-commit-config.yaml                       |    30 +
 Pipfile                                       |    46 +
 Pipfile.lock                                  |  1561 +++
 README.md                                     |    13 +
 doc/Makefile                                  |    20 +
 doc/source/conf.py                            |    71 +
 .../initial_data/seed_urls.list               |    23 +
 doc/source/config_template/main.yaml          |    88 +
 .../config_template/plugins/__init__.py       |     0
 .../plugins/filter_resource_path.py           |    22 +
 .../config_template/plugins/filter_site.py    |    47 +
 .../plugins/filter_site_path.py               |    24 +
 doc/source/devel/devel.md                     |    63 +
 doc/source/devel/related_work.md              |    64 +
 doc/source/devel/todo.md                      |    77 +
 doc/source/development.rst                    |     9 +
 doc/source/elasticsearch.md                   |   119 +
 doc/source/index.rst                          |    37 +
 doc/source/installation.md                    |   122 +
 doc/source/introduction.md                    |    66 +
 doc/source/maintenance.md                     |    23 +
 doc/source/tensorflow_model_server.md         |    98 +
 license.txt                                   |    48 +
 pyproject.toml                                |    10 +
 src/atextcrawler/__init__.py                  |     0
 src/atextcrawler/__main__.py                  |    12 +
 src/atextcrawler/application.py               |   204 +
 src/atextcrawler/assets/iana_langs            |     7 +
 src/atextcrawler/assets/iso_639-1             |   219 +
 src/atextcrawler/assets/top_1e4               | 10000 ++++++++++++++++
 src/atextcrawler/config.py                    |   337 +
 src/atextcrawler/crawl.py                     |   215 +
 src/atextcrawler/db.py                        |   162 +
 src/atextcrawler/migrations/1.sql             |   297 +
 src/atextcrawler/models.py                    |   610 +
 src/atextcrawler/plugin_defaults/__init__.py  |     0
 .../plugin_defaults/filter_resource_path.py   |    22 +
 .../plugin_defaults/filter_site.py            |    47 +
 .../plugin_defaults/filter_site_path.py       |    24 +
 src/atextcrawler/resource/__init__.py         |    10 +
 src/atextcrawler/resource/__main__.py         |    96 +
 src/atextcrawler/resource/dedup.py            |    59 +
 src/atextcrawler/resource/document.py         |   131 +
 src/atextcrawler/resource/feed.py             |   155 +
 src/atextcrawler/resource/fetch.py            |   327 +
 src/atextcrawler/resource/operations.py       |   347 +
 src/atextcrawler/resource/page.py             |   355 +
 src/atextcrawler/resource/plaintext.py        |   148 +
 src/atextcrawler/resource/sitemap.py          |   149 +
 src/atextcrawler/search/__init__.py           |     6 +
 src/atextcrawler/search/engine.py             |   270 +
 src/atextcrawler/site/__init__.py             |     9 +
 src/atextcrawler/site/__main__.py             |    68 +
 src/atextcrawler/site/feeds.py                |   100 +
 src/atextcrawler/site/operations.py           |   267 +
 src/atextcrawler/site/parse.py                |   255 +
 src/atextcrawler/site/queue.py                |   127 +
 src/atextcrawler/site/robots.py               |    98 +
 src/atextcrawler/site/seed.py                 |    72 +
 src/atextcrawler/tensorflow.py                |    69 +
 src/atextcrawler/utils/__init__.py            |     0
 src/atextcrawler/utils/annotation.py          |   481 +
 src/atextcrawler/utils/date_finder.py         |    90 +
 src/atextcrawler/utils/durl.py                |   278 +
 src/atextcrawler/utils/html.py                |   136 +
 src/atextcrawler/utils/http.py                |    58 +
 src/atextcrawler/utils/json.py                |    32 +
 src/atextcrawler/utils/lang.py                |    44 +
 src/atextcrawler/utils/link.py                |   116 +
 src/atextcrawler/utils/muse.py                |   120 +
 src/atextcrawler/utils/probe.py               |    22 +
 src/atextcrawler/utils/section.py             |    74 +
 src/atextcrawler/utils/similarity.py          |    92 +
 src/atextcrawler/utils/tag.py                 |   189 +
 tests/__init__.py                             |     7 +
 tests/annotation.py                           |    49 +
 tests/date_finder.py                          |    20 +
 tests/durl.py                                 |    68 +
 tests/page.py                                 |    24 +
 tests/section.py                              |   105 +
 tests/simhash.py                              |    54 +
 tests/text.py                                 |    65 +
 83 files changed, 20130 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 Pipfile
 create mode 100644 Pipfile.lock
 create mode 100644 README.md
 create mode 100644 doc/Makefile
 create mode 100644 doc/source/conf.py
 create mode 100644 doc/source/config_template/initial_data/seed_urls.list
 create mode 100644 doc/source/config_template/main.yaml
 create mode 100644 doc/source/config_template/plugins/__init__.py
 create mode 100644 doc/source/config_template/plugins/filter_resource_path.py
 create mode 100644 doc/source/config_template/plugins/filter_site.py
 create mode 100644 doc/source/config_template/plugins/filter_site_path.py
 create mode 100644 doc/source/devel/devel.md
 create mode 100644 doc/source/devel/related_work.md
 create mode 100644 doc/source/devel/todo.md
 create mode 100644 doc/source/development.rst
 create mode 100644 doc/source/elasticsearch.md
 create mode 100644 doc/source/index.rst
 create mode 100644 doc/source/installation.md
 create mode 100644 doc/source/introduction.md
 create mode 100644 doc/source/maintenance.md
 create mode 100644 doc/source/tensorflow_model_server.md
 create mode 100644 license.txt
 create mode 100644 pyproject.toml
 create mode 100644 src/atextcrawler/__init__.py
 create mode 100644 src/atextcrawler/__main__.py
 create mode 100644 src/atextcrawler/application.py
 create mode 100644 src/atextcrawler/assets/iana_langs
 create mode 100644 src/atextcrawler/assets/iso_639-1
 create mode 100644 src/atextcrawler/assets/top_1e4
 create mode 100644 src/atextcrawler/config.py
 create mode 100644 src/atextcrawler/crawl.py
 create mode 100644 src/atextcrawler/db.py
 create mode 100644 src/atextcrawler/migrations/1.sql
 create mode 100644 src/atextcrawler/models.py
 create mode 100644 src/atextcrawler/plugin_defaults/__init__.py
 create mode 100644 src/atextcrawler/plugin_defaults/filter_resource_path.py
 create mode 100644 src/atextcrawler/plugin_defaults/filter_site.py
 create mode 100644 src/atextcrawler/plugin_defaults/filter_site_path.py
 create mode 100644 src/atextcrawler/resource/__init__.py
 create mode 100644 src/atextcrawler/resource/__main__.py
 create mode 100644 src/atextcrawler/resource/dedup.py
 create mode 100644 src/atextcrawler/resource/document.py
 create mode 100644 src/atextcrawler/resource/feed.py
 create mode 100644 src/atextcrawler/resource/fetch.py
 create mode 100644 src/atextcrawler/resource/operations.py
 create mode 100644 src/atextcrawler/resource/page.py
 create mode 100644 src/atextcrawler/resource/plaintext.py
 create mode 100644 src/atextcrawler/resource/sitemap.py
 create mode 100644 src/atextcrawler/search/__init__.py
 create mode 100644 src/atextcrawler/search/engine.py
 create mode 100644 src/atextcrawler/site/__init__.py
 create mode 100644 src/atextcrawler/site/__main__.py
 create mode 100644 src/atextcrawler/site/feeds.py
 create mode 100644 src/atextcrawler/site/operations.py
 create mode 100644 src/atextcrawler/site/parse.py
 create mode 100644 src/atextcrawler/site/queue.py
 create mode 100644 src/atextcrawler/site/robots.py
 create mode 100644 src/atextcrawler/site/seed.py
 create mode 100644 src/atextcrawler/tensorflow.py
 create mode 100644 src/atextcrawler/utils/__init__.py
 create mode 100644 src/atextcrawler/utils/annotation.py
 create mode 100644 src/atextcrawler/utils/date_finder.py
 create mode 100644 src/atextcrawler/utils/durl.py
 create mode 100644 src/atextcrawler/utils/html.py
 create mode 100644 src/atextcrawler/utils/http.py
 create mode 100644 src/atextcrawler/utils/json.py
 create mode 100644 src/atextcrawler/utils/lang.py
 create mode 100644 src/atextcrawler/utils/link.py
 create mode 100644 src/atextcrawler/utils/muse.py
 create mode 100644 src/atextcrawler/utils/probe.py
 create mode 100644 src/atextcrawler/utils/section.py
 create mode 100644 src/atextcrawler/utils/similarity.py
 create mode 100644 src/atextcrawler/utils/tag.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/annotation.py
 create mode 100644 tests/date_finder.py
 create mode 100644 tests/durl.py
 create mode 100644 tests/page.py
 create mode 100644 tests/section.py
 create mode 100644 tests/simhash.py
 create mode 100644 tests/text.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b9d2de6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,51 @@
+# Backup files
+*.~
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+NOTES
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+htmlcov
+
+# Translations
+*.mo
+
+# mypy cache
+.mypy_cache
+
+# Sphinx documentation
+doc/build/
+doc/source/reference/
+
+# tmp dir
+tmp/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..adf1b1a
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.0.1
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+-   repo: https://github.com/psf/black
+    rev: 21.11b1
+    hooks:
+    -   id: black
+-   repo: https://github.com/timothycrosley/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+        args: ["--profile", "black", "--filter-files", "-l", "79"]
+-   repo: https://github.com/myint/autoflake
+    rev: v1.4
+    hooks:
+      - id: autoflake
+        args:
+          [
+            "--in-place",
+            "--remove-all-unused-imports",
+            "--ignore-init-module-imports",
+            "--remove-unused-variables",
+          ]
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..eed7f14
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,46 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+aiohttp = "*"
+async-lru = "*"
+asyncpg = "*"
+beautifulsoup4 = "*"
+elasticsearch = { version = ">=7.0.0,<8.0.0", extras = ['async'] }
+elasticsearch-dsl = { version = ">=7.0.0,<8.0.0" }
+feedparser = "*"
+gcld3 = "*"
+# TODO: recheck
+pypandoc = "*"
+pytidylib = "*"
+pytz = "*"
+pyyaml = "*"
+tika = "*"
+tldextract = "*"
+voluptuous = "*"
+simhash = "*"
+async-dns = "*"
+types-pyyaml = "*"
+sphinx-rtd-theme = "*"
+
+[dev-packages]
+mypy = "*"
+pre-commit = "*"
+sphinx = "*"
+myst-parser = "*"
+isort = "*"
+blacken-docs = "*"
+pybetter = "*"
+interrogate = "*"
+autoflake = "*"
+types-pyyaml = "*"
+types-pytz = "*"
+black = "*"
+
+[requires]
+python_version = "3.9"
+
+[pipenv]
+allow_prereleases = true
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..58e2e74
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,1561 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "df63c76f1b8b031337d671aade6cc91f9add2205a75dbbd2770fa14e9430be55"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.9"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "aiohttp": {
+            "hashes": [
+                "sha256:01d7bdb774a9acc838e6b8f1d114f45303841b89b95984cbb7d80ea41172a9e3",
+                "sha256:03a6d5349c9ee8f79ab3ff3694d6ce1cfc3ced1c9d36200cb8f08ba06bd3b782",
+                "sha256:04d48b8ce6ab3cf2097b1855e1505181bdd05586ca275f2505514a6e274e8e75",
+                "sha256:0770e2806a30e744b4e21c9d73b7bee18a1cfa3c47991ee2e5a65b887c49d5cf",
+                "sha256:07b05cd3305e8a73112103c834e91cd27ce5b4bd07850c4b4dbd1877d3f45be7",
+                "sha256:086f92daf51a032d062ec5f58af5ca6a44d082c35299c96376a41cbb33034675",
+                "sha256:099ebd2c37ac74cce10a3527d2b49af80243e2a4fa39e7bce41617fbc35fa3c1",
+                "sha256:0c7ebbbde809ff4e970824b2b6cb7e4222be6b95a296e46c03cf050878fc1785",
+                "sha256:102e487eeb82afac440581e5d7f8f44560b36cf0bdd11abc51a46c1cd88914d4",
+                "sha256:11691cf4dc5b94236ccc609b70fec991234e7ef8d4c02dd0c9668d1e486f5abf",
+                "sha256:11a67c0d562e07067c4e86bffc1553f2cf5b664d6111c894671b2b8712f3aba5",
+                "sha256:12de6add4038df8f72fac606dff775791a60f113a725c960f2bab01d8b8e6b15",
+                "sha256:13487abd2f761d4be7c8ff9080de2671e53fff69711d46de703c310c4c9317ca",
+                "sha256:15b09b06dae900777833fe7fc4b4aa426556ce95847a3e8d7548e2d19e34edb8",
+                "sha256:1c182cb873bc91b411e184dab7a2b664d4fea2743df0e4d57402f7f3fa644bac",
+                "sha256:1ed0b6477896559f17b9eaeb6d38e07f7f9ffe40b9f0f9627ae8b9926ae260a8",
+                "sha256:28d490af82bc6b7ce53ff31337a18a10498303fe66f701ab65ef27e143c3b0ef",
+                "sha256:2e5d962cf7e1d426aa0e528a7e198658cdc8aa4fe87f781d039ad75dcd52c516",
+                "sha256:2ed076098b171573161eb146afcb9129b5ff63308960aeca4b676d9d3c35e700",
+                "sha256:2f2f69dca064926e79997f45b2f34e202b320fd3782f17a91941f7eb85502ee2",
+                "sha256:31560d268ff62143e92423ef183680b9829b1b482c011713ae941997921eebc8",
+                "sha256:31d1e1c0dbf19ebccbfd62eff461518dcb1e307b195e93bba60c965a4dcf1ba0",
+                "sha256:37951ad2f4a6df6506750a23f7cbabad24c73c65f23f72e95897bb2cecbae676",
+                "sha256:3af642b43ce56c24d063325dd2cf20ee012d2b9ba4c3c008755a301aaea720ad",
+                "sha256:44db35a9e15d6fe5c40d74952e803b1d96e964f683b5a78c3cc64eb177878155",
+                "sha256:473d93d4450880fe278696549f2e7aed8cd23708c3c1997981464475f32137db",
+                "sha256:477c3ea0ba410b2b56b7efb072c36fa91b1e6fc331761798fa3f28bb224830dd",
+                "sha256:4a4a4e30bf1edcad13fb0804300557aedd07a92cabc74382fdd0ba6ca2661091",
+                "sha256:4aed991a28ea3ce320dc8ce655875e1e00a11bdd29fe9444dd4f88c30d558602",
+                "sha256:51467000f3647d519272392f484126aa716f747859794ac9924a7aafa86cd411",
+                "sha256:55c3d1072704d27401c92339144d199d9de7b52627f724a949fc7d5fc56d8b93",
+                "sha256:589c72667a5febd36f1315aa6e5f56dd4aa4862df295cb51c769d16142ddd7cd",
+                "sha256:5bfde62d1d2641a1f5173b8c8c2d96ceb4854f54a44c23102e2ccc7e02f003ec",
+                "sha256:5c23b1ad869653bc818e972b7a3a79852d0e494e9ab7e1a701a3decc49c20d51",
+                "sha256:61bfc23df345d8c9716d03717c2ed5e27374e0fe6f659ea64edcd27b4b044cf7",
+                "sha256:6ae828d3a003f03ae31915c31fa684b9890ea44c9c989056fea96e3d12a9fa17",
+                "sha256:6c7cefb4b0640703eb1069835c02486669312bf2f12b48a748e0a7756d0de33d",
+                "sha256:6d69f36d445c45cda7b3b26afef2fc34ef5ac0cdc75584a87ef307ee3c8c6d00",
+                "sha256:6f0d5f33feb5f69ddd57a4a4bd3d56c719a141080b445cbf18f238973c5c9923",
+                "sha256:6f8b01295e26c68b3a1b90efb7a89029110d3a4139270b24fda961893216c440",
+                "sha256:713ac174a629d39b7c6a3aa757b337599798da4c1157114a314e4e391cd28e32",
+                "sha256:718626a174e7e467f0558954f94af117b7d4695d48eb980146016afa4b580b2e",
+                "sha256:7187a76598bdb895af0adbd2fb7474d7f6025d170bc0a1130242da817ce9e7d1",
+                "sha256:71927042ed6365a09a98a6377501af5c9f0a4d38083652bcd2281a06a5976724",
+                "sha256:7d08744e9bae2ca9c382581f7dce1273fe3c9bae94ff572c3626e8da5b193c6a",
+                "sha256:7dadf3c307b31e0e61689cbf9e06be7a867c563d5a63ce9dca578f956609abf8",
+                "sha256:81e3d8c34c623ca4e36c46524a3530e99c0bc95ed068fd6e9b55cb721d408fb2",
+                "sha256:844a9b460871ee0a0b0b68a64890dae9c415e513db0f4a7e3cab41a0f2fedf33",
+                "sha256:8b7ef7cbd4fec9a1e811a5de813311ed4f7ac7d93e0fda233c9b3e1428f7dd7b",
+                "sha256:97ef77eb6b044134c0b3a96e16abcb05ecce892965a2124c566af0fd60f717e2",
+                "sha256:99b5eeae8e019e7aad8af8bb314fb908dd2e028b3cdaad87ec05095394cce632",
+                "sha256:a25fa703a527158aaf10dafd956f7d42ac6d30ec80e9a70846253dd13e2f067b",
+                "sha256:a2f635ce61a89c5732537a7896b6319a8fcfa23ba09bec36e1b1ac0ab31270d2",
+                "sha256:a79004bb58748f31ae1cbe9fa891054baaa46fb106c2dc7af9f8e3304dc30316",
+                "sha256:a996d01ca39b8dfe77440f3cd600825d05841088fd6bc0144cc6c2ec14cc5f74",
+                "sha256:b0e20cddbd676ab8a64c774fefa0ad787cc506afd844de95da56060348021e96",
+                "sha256:b6613280ccedf24354406caf785db748bebbddcf31408b20c0b48cb86af76866",
+                "sha256:b9d00268fcb9f66fbcc7cd9fe423741d90c75ee029a1d15c09b22d23253c0a44",
+                "sha256:bb01ba6b0d3f6c68b89fce7305080145d4877ad3acaed424bae4d4ee75faa950",
+                "sha256:c2aef4703f1f2ddc6df17519885dbfa3514929149d3ff900b73f45998f2532fa",
+                "sha256:c34dc4958b232ef6188c4318cb7b2c2d80521c9a56c52449f8f93ab7bc2a8a1c",
+                "sha256:c3630c3ef435c0a7c549ba170a0633a56e92629aeed0e707fec832dee313fb7a",
+                "sha256:c3d6a4d0619e09dcd61021debf7059955c2004fa29f48788a3dfaf9c9901a7cd",
+                "sha256:d15367ce87c8e9e09b0f989bfd72dc641bcd04ba091c68cd305312d00962addd",
+                "sha256:d2f9b69293c33aaa53d923032fe227feac867f81682f002ce33ffae978f0a9a9",
+                "sha256:e999f2d0e12eea01caeecb17b653f3713d758f6dcc770417cf29ef08d3931421",
+                "sha256:ea302f34477fda3f85560a06d9ebdc7fa41e82420e892fc50b577e35fc6a50b2",
+                "sha256:eaba923151d9deea315be1f3e2b31cc39a6d1d2f682f942905951f4e40200922",
+                "sha256:ef9612483cb35171d51d9173647eed5d0069eaa2ee812793a75373447d487aa4",
+                "sha256:f5315a2eb0239185af1bddb1abf472d877fede3cc8d143c6cddad37678293237",
+                "sha256:fa0ffcace9b3aa34d205d8130f7873fcfefcb6a4dd3dd705b0dab69af6712642",
+                "sha256:fc5471e1a54de15ef71c1bc6ebe80d4dc681ea600e68bfd1cbce40427f0b7578"
+            ],
+            "index": "pypi",
+            "version": "==3.8.1"
+        },
+        "aiosignal": {
+            "hashes": [
+                "sha256:26e62109036cd181df6e6ad646f91f0dcfd05fe16d0cb924138ff2ab75d64e3a",
+                "sha256:78ed67db6c7b7ced4f98e495e572106d5c432a93e1ddd1bf475e1dc05f5b7df2"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==1.2.0"
+        },
+        "alabaster": {
+            "hashes": [
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
+            ],
+            "version": "==0.7.12"
+        },
+        "async-dns": {
+            "hashes": [
+                "sha256:8536be11c3789b154472a86db9df5c2149d5466949c78071019bf5edccbb639e",
+                "sha256:a257e47cc64022f95d570a1cd7f5fe90c2d8546b24fbe1049c3980a9a5832b96"
+            ],
+            "index": "pypi",
+            "version": "==2.0.0"
+        },
+        "async-lru": {
+            "hashes": [
+                "sha256:baa898027619f5cc31b7966f96f00e4fc0df43ba206a8940a5d1af5336a477cb"
+            ],
+            "index": "pypi",
+            "version": "==1.0.2"
+        },
+        "async-timeout": {
+            "hashes": [
+                "sha256:a22c0b311af23337eb05fcf05a8b51c3ea53729d46fb5460af62bee033cec690",
+                "sha256:b930cb161a39042f9222f6efb7301399c87eeab394727ec5437924a36d6eef51"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.0.1"
+        },
+        "asyncpg": {
+            "hashes": [
+                "sha256:0a61fb196ce4dae2f2fa26eb20a778db21bbee484d2e798cb3cc988de13bdd1b",
+                "sha256:18d49e2d93a7139a2fdbd113e320cc47075049997268a61bfbe0dde680c55471",
+                "sha256:191fe6341385b7fdea7dbdcf47fd6db3fd198827dcc1f2b228476d13c05a03c6",
+                "sha256:1a70783f6ffa34cc7dd2de20a873181414a34fd35a4a208a1f1a7f9f695e4ec4",
+                "sha256:2633331cbc8429030b4f20f712f8d0fbba57fa8555ee9b2f45f981b81328b256",
+                "sha256:2bc197fc4aca2fd24f60241057998124012469d2e414aed3f992579db0c88e3a",
+                "sha256:4327f691b1bdb222df27841938b3e04c14068166b3a97491bec2cb982f49f03e",
+                "sha256:43cde84e996a3afe75f325a68300093425c2f47d340c0fc8912765cf24a1c095",
+                "sha256:52fab7f1b2c29e187dd8781fce896249500cf055b63471ad66332e537e9b5f7e",
+                "sha256:56d88d7ef4341412cd9c68efba323a4519c916979ba91b95d4c08799d2ff0c09",
+                "sha256:5e4105f57ad1e8fbc8b1e535d8fcefa6ce6c71081228f08680c6dea24384ff0e",
+                "sha256:63f8e6a69733b285497c2855464a34de657f2cccd25aeaeeb5071872e9382540",
+                "sha256:649e2966d98cc48d0646d9a4e29abecd8b59d38d55c256d5c857f6b27b7407ac",
+                "sha256:6f8f5fc975246eda83da8031a14004b9197f510c41511018e7b1bedde6968e92",
+                "sha256:72a1e12ea0cf7c1e02794b697e3ca967b2360eaa2ce5d4bfdd8604ec2d6b774b",
+                "sha256:739bbd7f89a2b2f6bc44cb8bf967dab12c5bc714fcbe96e68d512be45ecdf962",
+                "sha256:863d36eba4a7caa853fd7d83fad5fd5306f050cc2fe6e54fbe10cdb30420e5e9",
+                "sha256:a738f1b2876f30d710d3dc1e7858160a0afe1603ba16bf5f391f5316eb0ed855",
+                "sha256:a84d30e6f850bac0876990bcd207362778e2208df0bee8be8da9f1558255e634",
+                "sha256:acb311722352152936e58a8ee3c5b8e791b24e84cd7d777c414ff05b3530ca68",
+                "sha256:beaecc52ad39614f6ca2e48c3ca15d56e24a2c15cbfdcb764a4320cc45f02fd5",
+                "sha256:bf5e3408a14a17d480f36ebaf0401a12ff6ae5457fdf45e4e2775c51cc9517d3",
+                "sha256:bf6dc9b55b9113f39eaa2057337ce3f9ef7de99a053b8a16360395ce588925cd",
+                "sha256:ddb4c3263a8d63dcde3d2c4ac1c25206bfeb31fa83bd70fd539e10f87739dee4",
+                "sha256:f55918ded7b85723a5eaeb34e86e7b9280d4474be67df853ab5a7fa0cc7c6bf2",
+                "sha256:fe471ccd915b739ca65e2e4dbd92a11b44a5b37f2e38f70827a1c147dafe0fa8"
+            ],
+            "index": "pypi",
+            "version": "==0.25.0"
+        },
+        "attrs": {
+            "hashes": [
+                "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
+                "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==21.2.0"
+        },
+        "babel": {
+            "hashes": [
+                "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
+                "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.9.1"
+        },
+        "beautifulsoup4": {
+            "hashes": [
+                "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
+                "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"
+            ],
+            "index": "pypi",
+            "version": "==4.10.0"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
+                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
+            ],
+            "version": "==2021.10.8"
+        },
+        "charset-normalizer": {
+            "hashes": [
+                "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0",
+                "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==2.0.8"
+        },
+        "docutils": {
+            "hashes": [
+                "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125",
+                "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==0.17.1"
+        },
+        "elasticsearch": {
+            "extras": [
+                "async"
+            ],
+            "hashes": [
+                "sha256:436f871848a5020bf9b47495812b229b59bd0c5d7e40adbd5e3c89896b311704",
+                "sha256:83c299a08fc8737c72454e6d3b2a01ba1b194e4f4d9e4f8bae7058cec326f39f"
+            ],
+            "index": "pypi",
+            "version": "==7.15.2"
+        },
+        "elasticsearch-dsl": {
+            "hashes": [
+                "sha256:046ea10820b94c075081b528b4526c5bc776bda4226d702f269a5f203232064b",
+                "sha256:c4a7b93882918a413b63bed54018a1685d7410ffd8facbc860ee7fd57f214a6d"
+            ],
+            "index": "pypi",
+            "version": "==7.4.0"
+        },
+        "feedparser": {
+            "hashes": [
+                "sha256:1b7f57841d9cf85074deb316ed2c795091a238adb79846bc46dccdaf80f9c59a",
+                "sha256:5ce0410a05ab248c8c7cfca3a0ea2203968ee9ff4486067379af4827a59f9661"
+            ],
+            "index": "pypi",
+            "version": "==6.0.8"
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:2e139a228bcf56dd8b2274a65174d005c4a6b68540ee0bdbb92c76f43f29f7e8",
+                "sha256:93d512b32a23baf4cac44ffd72ccf70732aeff7b8050fcaf6d3ec406d954baf4"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.4.0"
+        },
+        "frozenlist": {
+            "hashes": [
+                "sha256:01d79515ed5aa3d699b05f6bdcf1fe9087d61d6b53882aa599a10853f0479c6c",
+                "sha256:0a7c7cce70e41bc13d7d50f0e5dd175f14a4f1837a8549b0936ed0cbe6170bf9",
+                "sha256:11ff401951b5ac8c0701a804f503d72c048173208490c54ebb8d7bb7c07a6d00",
+                "sha256:14a5cef795ae3e28fb504b73e797c1800e9249f950e1c964bb6bdc8d77871161",
+                "sha256:16eef427c51cb1203a7c0ab59d1b8abccaba9a4f58c4bfca6ed278fc896dc193",
+                "sha256:16ef7dd5b7d17495404a2e7a49bac1bc13d6d20c16d11f4133c757dd94c4144c",
+                "sha256:181754275d5d32487431a0a29add4f897968b7157204bc1eaaf0a0ce80c5ba7d",
+                "sha256:1cf63243bc5f5c19762943b0aa9e0d3fb3723d0c514d820a18a9b9a5ef864315",
+                "sha256:1cfe6fef507f8bac40f009c85c7eddfed88c1c0d38c75e72fe10476cef94e10f",
+                "sha256:1fef737fd1388f9b93bba8808c5f63058113c10f4e3c0763ced68431773f72f9",
+                "sha256:25b358aaa7dba5891b05968dd539f5856d69f522b6de0bf34e61f133e077c1a4",
+                "sha256:26f602e380a5132880fa245c92030abb0fc6ff34e0c5500600366cedc6adb06a",
+                "sha256:28e164722ea0df0cf6d48c4d5bdf3d19e87aaa6dfb39b0ba91153f224b912020",
+                "sha256:2de5b931701257d50771a032bba4e448ff958076380b049fd36ed8738fdb375b",
+                "sha256:3457f8cf86deb6ce1ba67e120f1b0128fcba1332a180722756597253c465fc1d",
+                "sha256:351686ca020d1bcd238596b1fa5c8efcbc21bffda9d0efe237aaa60348421e2a",
+                "sha256:406aeb340613b4b559db78d86864485f68919b7141dec82aba24d1477fd2976f",
+                "sha256:41de4db9b9501679cf7cddc16d07ac0f10ef7eb58c525a1c8cbff43022bddca4",
+                "sha256:41f62468af1bd4e4b42b5508a3fe8cc46a693f0cdd0ca2f443f51f207893d837",
+                "sha256:4766632cd8a68e4f10f156a12c9acd7b1609941525569dd3636d859d79279ed3",
+                "sha256:47b2848e464883d0bbdcd9493c67443e5e695a84694efff0476f9059b4cb6257",
+                "sha256:4a495c3d513573b0b3f935bfa887a85d9ae09f0627cf47cad17d0cc9b9ba5c38",
+                "sha256:4ad065b2ebd09f32511ff2be35c5dfafee6192978b5a1e9d279a5c6e121e3b03",
+                "sha256:4c457220468d734e3077580a3642b7f682f5fd9507f17ddf1029452450912cdc",
+                "sha256:4f52d0732e56906f8ddea4bd856192984650282424049c956857fed43697ea43",
+                "sha256:54a1e09ab7a69f843cd28fefd2bcaf23edb9e3a8d7680032c8968b8ac934587d",
+                "sha256:5a72eecf37eface331636951249d878750db84034927c997d47f7f78a573b72b",
+                "sha256:5df31bb2b974f379d230a25943d9bf0d3bc666b4b0807394b131a28fca2b0e5f",
+                "sha256:66a518731a21a55b7d3e087b430f1956a36793acc15912e2878431c7aec54210",
+                "sha256:6790b8d96bbb74b7a6f4594b6f131bd23056c25f2aa5d816bd177d95245a30e3",
+                "sha256:68201be60ac56aff972dc18085800b6ee07973c49103a8aba669dee3d71079de",
+                "sha256:6e105013fa84623c057a4381dc8ea0361f4d682c11f3816cc80f49a1f3bc17c6",
+                "sha256:705c184b77565955a99dc360f359e8249580c6b7eaa4dc0227caa861ef46b27a",
+                "sha256:72cfbeab7a920ea9e74b19aa0afe3b4ad9c89471e3badc985d08756efa9b813b",
+                "sha256:735f386ec522e384f511614c01d2ef9cf799f051353876b4c6fb93ef67a6d1ee",
+                "sha256:82d22f6e6f2916e837c91c860140ef9947e31194c82aaeda843d6551cec92f19",
+                "sha256:83334e84a290a158c0c4cc4d22e8c7cfe0bba5b76d37f1c2509dabd22acafe15",
+                "sha256:84e97f59211b5b9083a2e7a45abf91cfb441369e8bb6d1f5287382c1c526def3",
+                "sha256:87521e32e18a2223311afc2492ef2d99946337da0779ddcda77b82ee7319df59",
+                "sha256:878ebe074839d649a1cdb03a61077d05760624f36d196884a5cafb12290e187b",
+                "sha256:89fdfc84c6bf0bff2ff3170bb34ecba8a6911b260d318d377171429c4be18c73",
+                "sha256:8b4c7665a17c3a5430edb663e4ad4e1ad457614d1b2f2b7f87052e2ef4fa45ca",
+                "sha256:8b54cdd2fda15467b9b0bfa78cee2ddf6dbb4585ef23a16e14926f4b076dfae4",
+                "sha256:94728f97ddf603d23c8c3dd5cae2644fa12d33116e69f49b1644a71bb77b89ae",
+                "sha256:954b154a4533ef28bd3e83ffdf4eadf39deeda9e38fb8feaf066d6069885e034",
+                "sha256:977a1438d0e0d96573fd679d291a1542097ea9f4918a8b6494b06610dfeefbf9",
+                "sha256:9ade70aea559ca98f4b1b1e5650c45678052e76a8ab2f76d90f2ac64180215a2",
+                "sha256:9b6e21e5770df2dea06cb7b6323fbc008b13c4a4e3b52cb54685276479ee7676",
+                "sha256:a0d3ffa8772464441b52489b985d46001e2853a3b082c655ec5fad9fb6a3d618",
+                "sha256:a37594ad6356e50073fe4f60aa4187b97d15329f2138124d252a5a19c8553ea4",
+                "sha256:a8d86547a5e98d9edd47c432f7a14b0c5592624b496ae9880fb6332f34af1edc",
+                "sha256:aa44c4740b4e23fcfa259e9dd52315d2b1770064cde9507457e4c4a65a04c397",
+                "sha256:acc4614e8d1feb9f46dd829a8e771b8f5c4b1051365d02efb27a3229048ade8a",
+                "sha256:af2a51c8a381d76eabb76f228f565ed4c3701441ecec101dd18be70ebd483cfd",
+                "sha256:b2ae2f5e9fa10805fb1c9adbfefaaecedd9e31849434be462c3960a0139ed729",
+                "sha256:b46f997d5ed6d222a863b02cdc9c299101ee27974d9bbb2fd1b3c8441311c408",
+                "sha256:bc93f5f62df3bdc1f677066327fc81f92b83644852a31c6aa9b32c2dde86ea7d",
+                "sha256:bfbaa08cf1452acad9cb1c1d7b89394a41e712f88df522cea1a0f296b57782a0",
+                "sha256:c1e8e9033d34c2c9e186e58279879d78c94dd365068a3607af33f2bc99357a53",
+                "sha256:c5328ed53fdb0a73c8a50105306a3bc013e5ca36cca714ec4f7bd31d38d8a97f",
+                "sha256:c6a9d84ee6427b65a81fc24e6ef589cb794009f5ca4150151251c062773e7ed2",
+                "sha256:c98d3c04701773ad60d9545cd96df94d955329efc7743fdb96422c4b669c633b",
+                "sha256:cb3957c39668d10e2b486acc85f94153520a23263b6401e8f59422ef65b9520d",
+                "sha256:e63ad0beef6ece06475d29f47d1f2f29727805376e09850ebf64f90777962792",
+                "sha256:e74f8b4d8677ebb4015ac01fcaf05f34e8a1f22775db1f304f497f2f88fdc697",
+                "sha256:e7d0dd3e727c70c2680f5f09a0775525229809f1a35d8552b92ff10b2b14f2c2",
+                "sha256:ec6cf345771cdb00791d271af9a0a6fbfc2b6dd44cb753f1eeaa256e21622adb",
+                "sha256:ed58803563a8c87cf4c0771366cf0ad1aa265b6b0ae54cbbb53013480c7ad74d",
+                "sha256:f0081a623c886197ff8de9e635528fd7e6a387dccef432149e25c13946cb0cd0",
+                "sha256:f025f1d6825725b09c0038775acab9ae94264453a696cc797ce20c0769a7b367",
+                "sha256:f5f3b2942c3b8b9bfe76b408bbaba3d3bb305ee3693e8b1d631fe0a0d4f93673",
+                "sha256:fbd4844ff111449f3bbe20ba24fbb906b5b1c2384d0f3287c9f7da2354ce6d23"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==1.2.0"
+        },
+        "gcld3": {
+            "hashes": [
+                "sha256:11a127e493c2952a83a957e2f025d6ff8b1d2efd353baa25bd99d7ceb3c96c54",
+                "sha256:47c8c779bfe7372a38564b0cd357556dc362aec81cb55b0c889059e8b952e959",
+                "sha256:4fc4ae1c8c7baab21a46fc66074787d010ca338f5c4b2ff80dd2448d18cc3d89",
+                "sha256:51538b26dd1741f49ceb03421710d4355c6ce3fd257a2ddebe868e36458f5d45",
+                "sha256:b56a9852861c7209434917885b8331ddf5f11d3d5810baafb9a29ffd5ccd35d1",
+                "sha256:c6bc0ecf3d95943a2b9fe61214aab8284a3b33f25e07c6caad064ea59417620f",
+                "sha256:fb745958278bfcc230fe3e15af3babb2752e77afd929ae0f95b9ef52648f3271"
+            ],
+            "index": "pypi",
+            "version": "==3.0.13"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
+                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==3.3"
+        },
+        "imagesize": {
+            "hashes": [
+                "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c",
+                "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.3.0"
+        },
+        "jinja2": {
+            "hashes": [
+                "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
+                "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.3"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
+                "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
+                "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
+                "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194",
+                "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
+                "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
+                "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724",
+                "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
+                "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646",
+                "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
+                "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6",
+                "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a",
+                "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6",
+                "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad",
+                "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
+                "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38",
+                "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac",
+                "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
+                "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6",
+                "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047",
+                "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
+                "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
+                "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b",
+                "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
+                "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
+                "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a",
+                "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
+                "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1",
+                "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9",
+                "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864",
+                "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
+                "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee",
+                "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f",
+                "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
+                "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
+                "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
+                "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
+                "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b",
+                "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
+                "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86",
+                "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6",
+                "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
+                "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
+                "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
+                "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28",
+                "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e",
+                "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
+                "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
+                "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f",
+                "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d",
+                "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
+                "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
+                "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145",
+                "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
+                "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c",
+                "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1",
+                "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a",
+                "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207",
+                "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
+                "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53",
+                "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd",
+                "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134",
+                "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85",
+                "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9",
+                "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
+                "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
+                "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
+                "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
+                "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.0.1"
+        },
+        "multidict": {
+            "hashes": [
+                "sha256:06560fbdcf22c9387100979e65b26fba0816c162b888cb65b845d3def7a54c9b",
+                "sha256:067150fad08e6f2dd91a650c7a49ba65085303fcc3decbd64a57dc13a2733031",
+                "sha256:0a2cbcfbea6dc776782a444db819c8b78afe4db597211298dd8b2222f73e9cd0",
+                "sha256:0dd1c93edb444b33ba2274b66f63def8a327d607c6c790772f448a53b6ea59ce",
+                "sha256:0fed465af2e0eb6357ba95795d003ac0bdb546305cc2366b1fc8f0ad67cc3fda",
+                "sha256:116347c63ba049c1ea56e157fa8aa6edaf5e92925c9b64f3da7769bdfa012858",
+                "sha256:1b4ac3ba7a97b35a5ccf34f41b5a8642a01d1e55454b699e5e8e7a99b5a3acf5",
+                "sha256:1c7976cd1c157fa7ba5456ae5d31ccdf1479680dc9b8d8aa28afabc370df42b8",
+                "sha256:246145bff76cc4b19310f0ad28bd0769b940c2a49fc601b86bfd150cbd72bb22",
+                "sha256:25cbd39a9029b409167aa0a20d8a17f502d43f2efebfe9e3ac019fe6796c59ac",
+                "sha256:28e6d883acd8674887d7edc896b91751dc2d8e87fbdca8359591a13872799e4e",
+                "sha256:2d1d55cdf706ddc62822d394d1df53573d32a7a07d4f099470d3cb9323b721b6",
+                "sha256:2e77282fd1d677c313ffcaddfec236bf23f273c4fba7cdf198108f5940ae10f5",
+                "sha256:32fdba7333eb2351fee2596b756d730d62b5827d5e1ab2f84e6cbb287cc67fe0",
+                "sha256:35591729668a303a02b06e8dba0eb8140c4a1bfd4c4b3209a436a02a5ac1de11",
+                "sha256:380b868f55f63d048a25931a1632818f90e4be71d2081c2338fcf656d299949a",
+                "sha256:3822c5894c72e3b35aae9909bef66ec83e44522faf767c0ad39e0e2de11d3b55",
+                "sha256:38ba256ee9b310da6a1a0f013ef4e422fca30a685bcbec86a969bd520504e341",
+                "sha256:3bc3b1621b979621cee9f7b09f024ec76ec03cc365e638126a056317470bde1b",
+                "sha256:3d2d7d1fff8e09d99354c04c3fd5b560fb04639fd45926b34e27cfdec678a704",
+                "sha256:517d75522b7b18a3385726b54a081afd425d4f41144a5399e5abd97ccafdf36b",
+                "sha256:5f79c19c6420962eb17c7e48878a03053b7ccd7b69f389d5831c0a4a7f1ac0a1",
+                "sha256:5f841c4f14331fd1e36cbf3336ed7be2cb2a8f110ce40ea253e5573387db7621",
+                "sha256:637c1896497ff19e1ee27c1c2c2ddaa9f2d134bbb5e0c52254361ea20486418d",
+                "sha256:6ee908c070020d682e9b42c8f621e8bb10c767d04416e2ebe44e37d0f44d9ad5",
+                "sha256:77f0fb7200cc7dedda7a60912f2059086e29ff67cefbc58d2506638c1a9132d7",
+                "sha256:7878b61c867fb2df7a95e44b316f88d5a3742390c99dfba6c557a21b30180cac",
+                "sha256:78c106b2b506b4d895ddc801ff509f941119394b89c9115580014127414e6c2d",
+                "sha256:8b911d74acdc1fe2941e59b4f1a278a330e9c34c6c8ca1ee21264c51ec9b67ef",
+                "sha256:93de39267c4c676c9ebb2057e98a8138bade0d806aad4d864322eee0803140a0",
+                "sha256:9416cf11bcd73c861267e88aea71e9fcc35302b3943e45e1dbb4317f91a4b34f",
+                "sha256:94b117e27efd8e08b4046c57461d5a114d26b40824995a2eb58372b94f9fca02",
+                "sha256:9815765f9dcda04921ba467957be543423e5ec6a1136135d84f2ae092c50d87b",
+                "sha256:98ec9aea6223adf46999f22e2c0ab6cf33f5914be604a404f658386a8f1fba37",
+                "sha256:a37e9a68349f6abe24130846e2f1d2e38f7ddab30b81b754e5a1fde32f782b23",
+                "sha256:a43616aec0f0d53c411582c451f5d3e1123a68cc7b3475d6f7d97a626f8ff90d",
+                "sha256:a4771d0d0ac9d9fe9e24e33bed482a13dfc1256d008d101485fe460359476065",
+                "sha256:a5635bcf1b75f0f6ef3c8a1ad07b500104a971e38d3683167b9454cb6465ac86",
+                "sha256:a9acb76d5f3dd9421874923da2ed1e76041cb51b9337fd7f507edde1d86535d6",
+                "sha256:ac42181292099d91217a82e3fa3ce0e0ddf3a74fd891b7c2b347a7f5aa0edded",
+                "sha256:b227345e4186809d31f22087d0265655114af7cda442ecaf72246275865bebe4",
+                "sha256:b61f85101ef08cbbc37846ac0e43f027f7844f3fade9b7f6dd087178caedeee7",
+                "sha256:b70913cbf2e14275013be98a06ef4b412329fe7b4f83d64eb70dce8269ed1e1a",
+                "sha256:b9aad49466b8d828b96b9e3630006234879c8d3e2b0a9d99219b3121bc5cdb17",
+                "sha256:baf1856fab8212bf35230c019cde7c641887e3fc08cadd39d32a421a30151ea3",
+                "sha256:bd6c9c50bf2ad3f0448edaa1a3b55b2e6866ef8feca5d8dbec10ec7c94371d21",
+                "sha256:c1ff762e2ee126e6f1258650ac641e2b8e1f3d927a925aafcfde943b77a36d24",
+                "sha256:c30ac9f562106cd9e8071c23949a067b10211917fdcb75b4718cf5775356a940",
+                "sha256:c9631c642e08b9fff1c6255487e62971d8b8e821808ddd013d8ac058087591ac",
+                "sha256:cdd68778f96216596218b4e8882944d24a634d984ee1a5a049b300377878fa7c",
+                "sha256:ce8cacda0b679ebc25624d5de66c705bc53dcc7c6f02a7fb0f3ca5e227d80422",
+                "sha256:cfde464ca4af42a629648c0b0d79b8f295cf5b695412451716531d6916461628",
+                "sha256:d3def943bfd5f1c47d51fd324df1e806d8da1f8e105cc7f1c76a1daf0f7e17b0",
+                "sha256:d9b668c065968c5979fe6b6fa6760bb6ab9aeb94b75b73c0a9c1acf6393ac3bf",
+                "sha256:da7d57ea65744d249427793c042094c4016789eb2562576fb831870f9c878d9e",
+                "sha256:dc3a866cf6c13d59a01878cd806f219340f3e82eed514485e094321f24900677",
+                "sha256:df23c83398715b26ab09574217ca21e14694917a0c857e356fd39e1c64f8283f",
+                "sha256:dfc924a7e946dd3c6360e50e8f750d51e3ef5395c95dc054bc9eab0f70df4f9c",
+                "sha256:e4a67f1080123de76e4e97a18d10350df6a7182e243312426d508712e99988d4",
+                "sha256:e5283c0a00f48e8cafcecadebfa0ed1dac8b39e295c7248c44c665c16dc1138b",
+                "sha256:e58a9b5cc96e014ddf93c2227cbdeca94b56a7eb77300205d6e4001805391747",
+                "sha256:e6453f3cbeb78440747096f239d282cc57a2997a16b5197c9bc839099e1633d0",
+                "sha256:e6c4fa1ec16e01e292315ba76eb1d012c025b99d22896bd14a66628b245e3e01",
+                "sha256:e7d81ce5744757d2f05fc41896e3b2ae0458464b14b5a2c1e87a6a9d69aefaa8",
+                "sha256:ea21d4d5104b4f840b91d9dc8cbc832aba9612121eaba503e54eaab1ad140eb9",
+                "sha256:ecc99bce8ee42dcad15848c7885197d26841cb24fa2ee6e89d23b8993c871c64",
+                "sha256:f0bb0973f42ffcb5e3537548e0767079420aefd94ba990b61cf7bb8d47f4916d",
+                "sha256:f19001e790013ed580abfde2a4465388950728861b52f0da73e8e8a9418533c0",
+                "sha256:f76440e480c3b2ca7f843ff8a48dc82446b86ed4930552d736c0bac507498a52",
+                "sha256:f9bef5cff994ca3026fcc90680e326d1a19df9841c5e3d224076407cc21471a1",
+                "sha256:fc66d4016f6e50ed36fb39cd287a3878ffcebfa90008535c62e0e90a7ab713ae",
+                "sha256:fd77c8f3cba815aa69cb97ee2b2ef385c7c12ada9c734b0f3b32e26bb88bbf1d"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==5.2.0"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:011e4c430f2e2739e0d182cb7e2b5d47adc46a8db49a788e5798805b7878c4ba",
+                "sha256:013fa3500a6e5b3ba51401056aa9c41d83a7e737959d15f288d410f26cc33896",
+                "sha256:0ebb646ef72a2348036ed1692e6bb3f3dd4f8d026681b7168a9ac988d9832c27",
+                "sha256:21613822dd597d4645c586ac21910fded5344f843410dace91c129a38c31d8be",
+                "sha256:2242fa31413e40847016234485f228fa5e082b0c555d3db65fe9aa4efcfb8d8d",
+                "sha256:2934fb435d85341efb40f9db637a203a042300afdaa49f833608df21a5d8ae30",
+                "sha256:56109e7e9b205439990e90682163d8155cf5743efe65c30221ef3834621ffd3f",
+                "sha256:5e56515f5abb493bd32d2196ecd3ce794792419adfb7d8b4cccd4ddaf74ab924",
+                "sha256:6730a1495f1acedd97e82e32cca4d8dbe07b89f01f395ca02ca4a9e110d9519d",
+                "sha256:6759e6dafd96454be2d6dd80674293322191639400832688cd234c5f483ce1a9",
+                "sha256:7dbfa0abe053afbcb9e61ec1557556e4e30c3e4b5df4ec7849bf245e8c09feec",
+                "sha256:8c5016694b9bda77cda32ebfdde34d2246978ed4c49e9baab26bcf38621b7390",
+                "sha256:91bb1e29d74a90861e878b0c7bc941a1c0ac051cb4b171dc242e66953c95ca1e",
+                "sha256:a2dd58beb8a8266d704a76692e8eb76ff20f5b2940db7aeee216c2dbf226e5c6",
+                "sha256:b00d9bf43cc8975cf5e0c211d218e75a3f5ce1ae34dc84d8a489c28a0dba7848",
+                "sha256:b0ed56b9d7535d654d2a0478333cc08d1b9849767eafd07e1f6a3d8d90a2cad0",
+                "sha256:bc991b3f8ea7c0f6703df2bc23c098cfe6f1a3a5e8a3a901eb6a5619275d53ff",
+                "sha256:ccf027e3bbcd06b5c26a0196ddfc24c4d09d2001cc5d38738efff9d9ac8dee58",
+                "sha256:d0be0eb7df39f0e0732d73250de55e1dcc8086c23db970d5eab85dbf0713502d",
+                "sha256:e48368972e0999af098e0a6e9a3573895fd4c3b0b2d8c5cf215b17910cd6c124",
+                "sha256:e981667470ae74f06cfd0d54c5fa9cd88661a27eccaac2cba505039f0b29dc2e",
+                "sha256:eb6dd744a9f94b424bf70d62b7874798ea95b6b58fb63ec651b69a46872e5bd5"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==1.22.0rc1"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
+                "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==21.3"
+        },
+        "pygments": {
+            "hashes": [
+                "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380",
+                "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==2.10.0"
+        },
+        "pypandoc": {
+            "hashes": [
+                "sha256:080903342d8cca6d953835c103b0f280a6cb66a6a20102692143a138b046c44f",
+                "sha256:6ea03c8e92d561b0b47ff91ee9a777c50a8d3a325f3272ea4fefef4bb6562b91"
+            ],
+            "index": "pypi",
+            "version": "==1.6.4"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4",
+                "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.6"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
+                "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.8.2"
+        },
+        "pytidylib": {
+            "hashes": [
+                "sha256:22b1c8d75970d8064ff999c2369e98af1d0685417eda4c829a5c9f56764b0af3"
+            ],
+            "index": "pypi",
+            "version": "==0.3.2"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
+                "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
+            ],
+            "index": "pypi",
+            "version": "==2021.3"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
+                "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
+                "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
+                "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b",
+                "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4",
+                "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07",
+                "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba",
+                "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9",
+                "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
+                "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
+                "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
+                "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
+                "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
+                "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
+                "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
+                "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
+                "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
+                "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
+                "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
+                "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
+                "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
+                "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
+                "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
+                "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
+                "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
+                "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
+                "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
+                "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
+                "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
+                "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
+                "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
+                "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
+                "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
+            ],
+            "index": "pypi",
+            "version": "==6.0"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
+                "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
+            "version": "==2.26.0"
+        },
+        "requests-file": {
+            "hashes": [
+                "sha256:07d74208d3389d01c38ab89ef403af0cfec63957d53a0081d8eca738d0247d8e",
+                "sha256:dfe5dae75c12481f68ba353183c53a65e6044c923e64c24b2209f6c7570ca953"
+            ],
+            "version": "==1.5.1"
+        },
+        "sgmllib3k": {
+            "hashes": [
+                "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"
+            ],
+            "version": "==1.0.0"
+        },
+        "simhash": {
+            "hashes": [
+                "sha256:0245b465fbe0bd17a74f5b89b9a70c3061984e37d7d94214eb5a8ef545384b6d",
+                "sha256:18d9c476d1bec9fa039293e4659ef49976585f9e051cb78afec30c4ce8fa361a",
+                "sha256:a4f84ac68b9afff17c9f1e6046ba60ed5eff40578ddf8d6a3d54709c44fafea0",
+                "sha256:d486d44a1dde0245d0733b91c86d892e87a062c932a372d184f4d9ce970e2708",
+                "sha256:debaf4fff92f192dc0414f31fda1ef90069936b3d05ec520d2c790128c48ee9a"
+            ],
+            "index": "pypi",
+            "version": "==2.0.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
+                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.16.0"
+        },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1",
+                "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"
+            ],
+            "version": "==2.2.0"
+        },
+        "soupsieve": {
+            "hashes": [
+                "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb",
+                "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.3.1"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:048dac56039a5713f47a554589dc98a442b39226a2b9ed7f82797fcb2fe9253f",
+                "sha256:32a5b3e9a1b176cc25ed048557d4d3d01af635e6b76c5bc7a43b0a34447fbd45"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.3.1"
+        },
+        "sphinx-rtd-theme": {
+            "hashes": [
+                "sha256:4d35a56f4508cfee4c4fb604373ede6feae2a306731d533f409ef5c3496fdbd8",
+                "sha256:eec6d497e4c2195fa0e8b2016b337532b8a699a68bcb22a512870e16925c6a5c"
+            ],
+            "index": "pypi",
+            "version": "==1.0.0"
+        },
+        "sphinxcontrib-applehelp": {
+            "hashes": [
+                "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
+                "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-devhelp": {
+            "hashes": [
+                "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
+                "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-htmlhelp": {
+            "hashes": [
+                "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07",
+                "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.0.0"
+        },
+        "sphinxcontrib-jsmath": {
+            "hashes": [
+                "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
+                "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-qthelp": {
+            "hashes": [
+                "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
+                "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.3"
+        },
+        "sphinxcontrib-serializinghtml": {
+            "hashes": [
+                "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd",
+                "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.1.5"
+        },
+        "tika": {
+            "hashes": [
+                "sha256:c2c50f405622f74531841104f9e85c17511aede11de8e5385eab1a29a31f191b"
+            ],
+            "index": "pypi",
+            "version": "==1.24"
+        },
+        "tldextract": {
+            "hashes": [
+                "sha256:d2034c3558651f7d8fdadea83fb681050b2d662dc67a00d950326dc902029444",
+                "sha256:f55e05f6bf4cc952a87d13594386d32ad2dd265630a8bdfc3df03bd60425c6b0"
+            ],
+            "index": "pypi",
+            "version": "==3.1.2"
+        },
+        "types-pyyaml": {
+            "hashes": [
+                "sha256:2e27b0118ca4248a646101c5c318dc02e4ca2866d6bc42e84045dbb851555a76",
+                "sha256:d5b318269652e809b5c30a5fe666c50159ab80bfd41cd6bafe655bf20b29fcba"
+            ],
+            "index": "pypi",
+            "version": "==6.0.1"
+        },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:2cdf80e4e04866a9b3689a51869016d36db0814d84b8d8a568d22781d45d27ed",
+                "sha256:829704698b22e13ec9eaf959122315eabb370b0884400e9818334d8b677023d9"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.0.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
+                "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+            "version": "==1.26.7"
+        },
+        "voluptuous": {
+            "hashes": [
+                "sha256:4db1ac5079db9249820d49c891cb4660a6f8cae350491210abce741fabf56513"
+            ],
+            "index": "pypi",
+            "version": "==0.12.2"
+        },
+        "wheel": {
+            "hashes": [
+                "sha256:21014b2bd93c6d0034b6ba5d35e4eb284340e09d63c59aef6fc14b0f346146fd",
+                "sha256:e2ef7239991699e3355d54f8e968a21bb940a1dbf34a4d226741e64462516fad"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==0.37.0"
+        },
+        "yarl": {
+            "hashes": [
+                "sha256:044daf3012e43d4b3538562da94a88fb12a6490652dbc29fb19adfa02cf72eac",
+                "sha256:0cba38120db72123db7c58322fa69e3c0efa933040ffb586c3a87c063ec7cae8",
+                "sha256:167ab7f64e409e9bdd99333fe8c67b5574a1f0495dcfd905bc7454e766729b9e",
+                "sha256:1be4bbb3d27a4e9aa5f3df2ab61e3701ce8fcbd3e9846dbce7c033a7e8136746",
+                "sha256:1ca56f002eaf7998b5fcf73b2421790da9d2586331805f38acd9997743114e98",
+                "sha256:1d3d5ad8ea96bd6d643d80c7b8d5977b4e2fb1bab6c9da7322616fd26203d125",
+                "sha256:1eb6480ef366d75b54c68164094a6a560c247370a68c02dddb11f20c4c6d3c9d",
+                "sha256:1edc172dcca3f11b38a9d5c7505c83c1913c0addc99cd28e993efeaafdfaa18d",
+                "sha256:211fcd65c58bf250fb994b53bc45a442ddc9f441f6fec53e65de8cba48ded986",
+                "sha256:29e0656d5497733dcddc21797da5a2ab990c0cb9719f1f969e58a4abac66234d",
+                "sha256:368bcf400247318382cc150aaa632582d0780b28ee6053cd80268c7e72796dec",
+                "sha256:39d5493c5ecd75c8093fa7700a2fb5c94fe28c839c8e40144b7ab7ccba6938c8",
+                "sha256:3abddf0b8e41445426d29f955b24aeecc83fa1072be1be4e0d194134a7d9baee",
+                "sha256:3bf8cfe8856708ede6a73907bf0501f2dc4e104085e070a41f5d88e7faf237f3",
+                "sha256:3ec1d9a0d7780416e657f1e405ba35ec1ba453a4f1511eb8b9fbab81cb8b3ce1",
+                "sha256:45399b46d60c253327a460e99856752009fcee5f5d3c80b2f7c0cae1c38d56dd",
+                "sha256:52690eb521d690ab041c3919666bea13ab9fbff80d615ec16fa81a297131276b",
+                "sha256:534b047277a9a19d858cde163aba93f3e1677d5acd92f7d10ace419d478540de",
+                "sha256:580c1f15500e137a8c37053e4cbf6058944d4c114701fa59944607505c2fe3a0",
+                "sha256:59218fef177296451b23214c91ea3aba7858b4ae3306dde120224cfe0f7a6ee8",
+                "sha256:5ba63585a89c9885f18331a55d25fe81dc2d82b71311ff8bd378fc8004202ff6",
+                "sha256:5bb7d54b8f61ba6eee541fba4b83d22b8a046b4ef4d8eb7f15a7e35db2e1e245",
+                "sha256:6152224d0a1eb254f97df3997d79dadd8bb2c1a02ef283dbb34b97d4f8492d23",
+                "sha256:67e94028817defe5e705079b10a8438b8cb56e7115fa01640e9c0bb3edf67332",
+                "sha256:695ba021a9e04418507fa930d5f0704edbce47076bdcfeeaba1c83683e5649d1",
+                "sha256:6a1a9fe17621af43e9b9fcea8bd088ba682c8192d744b386ee3c47b56eaabb2c",
+                "sha256:6ab0c3274d0a846840bf6c27d2c60ba771a12e4d7586bf550eefc2df0b56b3b4",
+                "sha256:6feca8b6bfb9eef6ee057628e71e1734caf520a907b6ec0d62839e8293e945c0",
+                "sha256:737e401cd0c493f7e3dd4db72aca11cfe069531c9761b8ea474926936b3c57c8",
+                "sha256:788713c2896f426a4e166b11f4ec538b5736294ebf7d5f654ae445fd44270832",
+                "sha256:797c2c412b04403d2da075fb93c123df35239cd7b4cc4e0cd9e5839b73f52c58",
+                "sha256:8300401dc88cad23f5b4e4c1226f44a5aa696436a4026e456fe0e5d2f7f486e6",
+                "sha256:87f6e082bce21464857ba58b569370e7b547d239ca22248be68ea5d6b51464a1",
+                "sha256:89ccbf58e6a0ab89d487c92a490cb5660d06c3a47ca08872859672f9c511fc52",
+                "sha256:8b0915ee85150963a9504c10de4e4729ae700af11df0dc5550e6587ed7891e92",
+                "sha256:8cce6f9fa3df25f55521fbb5c7e4a736683148bcc0c75b21863789e5185f9185",
+                "sha256:95a1873b6c0dd1c437fb3bb4a4aaa699a48c218ac7ca1e74b0bee0ab16c7d60d",
+                "sha256:9b4c77d92d56a4c5027572752aa35082e40c561eec776048330d2907aead891d",
+                "sha256:9bfcd43c65fbb339dc7086b5315750efa42a34eefad0256ba114cd8ad3896f4b",
+                "sha256:9c1f083e7e71b2dd01f7cd7434a5f88c15213194df38bc29b388ccdf1492b739",
+                "sha256:a1d0894f238763717bdcfea74558c94e3bc34aeacd3351d769460c1a586a8b05",
+                "sha256:a467a431a0817a292121c13cbe637348b546e6ef47ca14a790aa2fa8cc93df63",
+                "sha256:aa32aaa97d8b2ed4e54dc65d241a0da1c627454950f7d7b1f95b13985afd6c5d",
+                "sha256:ac10bbac36cd89eac19f4e51c032ba6b412b3892b685076f4acd2de18ca990aa",
+                "sha256:ac35ccde589ab6a1870a484ed136d49a26bcd06b6a1c6397b1967ca13ceb3913",
+                "sha256:bab827163113177aee910adb1f48ff7af31ee0289f434f7e22d10baf624a6dfe",
+                "sha256:baf81561f2972fb895e7844882898bda1eef4b07b5b385bcd308d2098f1a767b",
+                "sha256:bf19725fec28452474d9887a128e98dd67eee7b7d52e932e6949c532d820dc3b",
+                "sha256:c01a89a44bb672c38f42b49cdb0ad667b116d731b3f4c896f72302ff77d71656",
+                "sha256:c0910c6b6c31359d2f6184828888c983d54d09d581a4a23547a35f1d0b9484b1",
+                "sha256:c10ea1e80a697cf7d80d1ed414b5cb8f1eec07d618f54637067ae3c0334133c4",
+                "sha256:c1164a2eac148d85bbdd23e07dfcc930f2e633220f3eb3c3e2a25f6148c2819e",
+                "sha256:c145ab54702334c42237a6c6c4cc08703b6aa9b94e2f227ceb3d477d20c36c63",
+                "sha256:c17965ff3706beedafd458c452bf15bac693ecd146a60a06a214614dc097a271",
+                "sha256:c19324a1c5399b602f3b6e7db9478e5b1adf5cf58901996fc973fe4fccd73eed",
+                "sha256:c2a1ac41a6aa980db03d098a5531f13985edcb451bcd9d00670b03129922cd0d",
+                "sha256:c6ddcd80d79c96eb19c354d9dca95291589c5954099836b7c8d29278a7ec0bda",
+                "sha256:c9c6d927e098c2d360695f2e9d38870b2e92e0919be07dbe339aefa32a090265",
+                "sha256:cc8b7a7254c0fc3187d43d6cb54b5032d2365efd1df0cd1749c0c4df5f0ad45f",
+                "sha256:cff3ba513db55cc6a35076f32c4cdc27032bd075c9faef31fec749e64b45d26c",
+                "sha256:d260d4dc495c05d6600264a197d9d6f7fc9347f21d2594926202fd08cf89a8ba",
+                "sha256:d6f3d62e16c10e88d2168ba2d065aa374e3c538998ed04996cd373ff2036d64c",
+                "sha256:da6df107b9ccfe52d3a48165e48d72db0eca3e3029b5b8cb4fe6ee3cb870ba8b",
+                "sha256:dfe4b95b7e00c6635a72e2d00b478e8a28bfb122dc76349a06e20792eb53a523",
+                "sha256:e39378894ee6ae9f555ae2de332d513a5763276a9265f8e7cbaeb1b1ee74623a",
+                "sha256:ede3b46cdb719c794427dcce9d8beb4abe8b9aa1e97526cc20de9bd6583ad1ef",
+                "sha256:f2a8508f7350512434e41065684076f640ecce176d262a7d54f0da41d99c5a95",
+                "sha256:f44477ae29025d8ea87ec308539f95963ffdc31a82f42ca9deecf2d505242e72",
+                "sha256:f64394bd7ceef1237cc604b5a89bf748c95982a84bcd3c4bbeb40f685c810794",
+                "sha256:fc4dd8b01a8112809e6b636b00f487846956402834a7fd59d46d4f4267181c41",
+                "sha256:fce78593346c014d0d986b7ebc80d782b7f5e19843ca798ed62f8e3ba8728576",
+                "sha256:fd547ec596d90c8676e369dd8a581a21227fe9b4ad37d0dc7feb4ccf544c2d59"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==1.7.2"
+        }
+    },
+    "develop": {
+        "alabaster": {
+            "hashes": [
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
+            ],
+            "version": "==0.7.12"
+        },
+        "attrs": {
+            "hashes": [
+                "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
+                "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==21.2.0"
+        },
+        "autoflake": {
+            "hashes": [
+                "sha256:61a353012cff6ab94ca062823d1fb2f692c4acda51c76ff83a8d77915fba51ea"
+            ],
+            "index": "pypi",
+            "version": "==1.4"
+        },
+        "babel": {
+            "hashes": [
+                "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
+                "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.9.1"
+        },
+        "backports.entry-points-selectable": {
+            "hashes": [
+                "sha256:7fceed9532a7aa2bd888654a7314f864a3c16a4e710b34a58cfc0f08114c663b",
+                "sha256:914b21a479fde881635f7af5adc7f6e38d6b274be32269070c53b698c60d5386"
+            ],
+            "markers": "python_version >= '2.7'",
+            "version": "==1.1.1"
+        },
+        "black": {
+            "hashes": [
+                "sha256:802c6c30b637b28645b7fde282ed2569c0cd777dbe493a41b6a03c1d903f99ac",
+                "sha256:a042adbb18b3262faad5aff4e834ff186bb893f95ba3a8013f09de1e5569def2"
+            ],
+            "index": "pypi",
+            "version": "==21.11b1"
+        },
+        "blacken-docs": {
+            "hashes": [
+                "sha256:3e8138b22c33406cef5946058e535a8aca45cd64b8e7d392b3bd1329fc1f4af8",
+                "sha256:a81e0abc9771521f445ee582f469c8ec2f5880c19c369d766bb151f79f642d7b"
+            ],
+            "index": "pypi",
+            "version": "==1.12.0"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
+                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
+            ],
+            "version": "==2021.10.8"
+        },
+        "cfgv": {
+            "hashes": [
+                "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426",
+                "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"
+            ],
+            "markers": "python_full_version >= '3.6.1'",
+            "version": "==3.3.1"
+        },
+        "charset-normalizer": {
+            "hashes": [
+                "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0",
+                "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==2.0.8"
+        },
+        "click": {
+            "hashes": [
+                "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a",
+                "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==7.1.2"
+        },
+        "colorama": {
+            "hashes": [
+                "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
+                "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==0.4.4"
+        },
+        "distlib": {
+            "hashes": [
+                "sha256:c8b54e8454e5bf6237cc84c20e8264c3e991e824ef27e8f1e81049867d861e31",
+                "sha256:d982d0751ff6eaaab5e2ec8e691d949ee80eddf01a62eaa96ddb11531fe16b05"
+            ],
+            "version": "==0.3.3"
+        },
+        "docutils": {
+            "hashes": [
+                "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125",
+                "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==0.17.1"
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:2e139a228bcf56dd8b2274a65174d005c4a6b68540ee0bdbb92c76f43f29f7e8",
+                "sha256:93d512b32a23baf4cac44ffd72ccf70732aeff7b8050fcaf6d3ec406d954baf4"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.4.0"
+        },
+        "identify": {
+            "hashes": [
+                "sha256:a33ae873287e81651c7800ca309dc1f84679b763c9c8b30680e16fbfa82f0107",
+                "sha256:eba31ca80258de6bb51453084bff4a923187cd2193b9c13710f2516ab30732cc"
+            ],
+            "markers": "python_full_version >= '3.6.1'",
+            "version": "==2.4.0"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
+                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==3.3"
+        },
+        "imagesize": {
+            "hashes": [
+                "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c",
+                "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.3.0"
+        },
+        "interrogate": {
+            "hashes": [
+                "sha256:a4ccc5cbd727c74acc98dee6f5e79ef264c0bcfa66b68d4e123069b2af89091a",
+                "sha256:b6f325f0aa84ac3ac6779d8708264d366102226c5af7d69058cecffcff7a6d6c"
+            ],
+            "index": "pypi",
+            "version": "==1.5.0"
+        },
+        "isort": {
+            "hashes": [
+                "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7",
+                "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"
+            ],
+            "index": "pypi",
+            "version": "==5.10.1"
+        },
+        "jinja2": {
+            "hashes": [
+                "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
+                "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.3"
+        },
+        "libcst": {
+            "hashes": [
+                "sha256:2e1f77fbaaff93b889376c92f588b718edbdc21f956abbe27d10dfd1ff2d76c3",
+                "sha256:330f9082a309bad808e283e80845a843200303bb256690185b98ca458a62c4f8"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==0.3.23"
+        },
+        "markdown-it-py": {
+            "hashes": [
+                "sha256:36be6bb3ad987bfdb839f5ba78ddf094552ca38ccbd784ae4f74a4e1419fc6e3",
+                "sha256:98080fc0bc34c4f2bcf0846a096a9429acbd9d5d8e67ed34026c03c61c464389"
+            ],
+            "markers": "python_version ~= '3.6'",
+            "version": "==1.1.0"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
+                "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
+                "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
+                "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194",
+                "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
+                "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
+                "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724",
+                "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
+                "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646",
+                "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
+                "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6",
+                "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a",
+                "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6",
+                "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad",
+                "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
+                "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38",
+                "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac",
+                "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
+                "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6",
+                "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047",
+                "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
+                "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
+                "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b",
+                "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
+                "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
+                "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a",
+                "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
+                "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1",
+                "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9",
+                "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864",
+                "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
+                "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee",
+                "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f",
+                "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
+                "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
+                "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
+                "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
+                "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b",
+                "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
+                "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86",
+                "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6",
+                "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
+                "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
+                "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
+                "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28",
+                "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e",
+                "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
+                "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
+                "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f",
+                "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d",
+                "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
+                "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
+                "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145",
+                "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
+                "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c",
+                "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1",
+                "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a",
+                "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207",
+                "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
+                "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53",
+                "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd",
+                "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134",
+                "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85",
+                "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9",
+                "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
+                "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
+                "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
+                "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
+                "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.0.1"
+        },
+        "mdit-py-plugins": {
+            "hashes": [
+                "sha256:1833bf738e038e35d89cb3a07eb0d227ed647ce7dd357579b65343740c6d249c",
+                "sha256:5991cef645502e80a5388ec4fc20885d2313d4871e8b8e320ca2de14ac0c015f"
+            ],
+            "markers": "python_version ~= '3.6'",
+            "version": "==0.2.8"
+        },
+        "mypy": {
+            "hashes": [
+                "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9",
+                "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a",
+                "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9",
+                "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e",
+                "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2",
+                "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212",
+                "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b",
+                "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885",
+                "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150",
+                "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703",
+                "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072",
+                "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457",
+                "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e",
+                "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0",
+                "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb",
+                "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97",
+                "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8",
+                "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811",
+                "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6",
+                "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de",
+                "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504",
+                "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921",
+                "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"
+            ],
+            "index": "pypi",
+            "version": "==0.910"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
+                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
+            ],
+            "version": "==0.4.3"
+        },
+        "myst-parser": {
+            "hashes": [
+                "sha256:40124b6f27a4c42ac7f06b385e23a9dcd03d84801e9c7130b59b3729a554b1f9",
+                "sha256:f7f3b2d62db7655cde658eb5d62b2ec2a4631308137bd8d10f296a40d57bbbeb"
+            ],
+            "index": "pypi",
+            "version": "==0.15.2"
+        },
+        "nodeenv": {
+            "hashes": [
+                "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b",
+                "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"
+            ],
+            "version": "==1.6.0"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
+                "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==21.3"
+        },
+        "pathspec": {
+            "hashes": [
+                "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a",
+                "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
+            ],
+            "version": "==0.9.0"
+        },
+        "platformdirs": {
+            "hashes": [
+                "sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2",
+                "sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.4.0"
+        },
+        "pre-commit": {
+            "hashes": [
+                "sha256:3c25add78dbdfb6a28a651780d5c311ac40dd17f160eb3954a0c59da40a505a7",
+                "sha256:a4ed01000afcb484d9eb8d504272e642c4c4099bbad3a6b27e519bd6a3e928a6"
+            ],
+            "index": "pypi",
+            "version": "==2.15.0"
+        },
+        "py": {
+            "hashes": [
+                "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
+                "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==1.11.0"
+        },
+        "pybetter": {
+            "hashes": [
+                "sha256:09379896e43b0da9e3b37b3d3aef0bc89d19b2646e023d984ef64e018cd21648",
+                "sha256:73ddc060e92eb0a192c4d7ba97cf26ea0c525a9d27bae7a917344a0c79d1755b"
+            ],
+            "index": "pypi",
+            "version": "==0.3.7"
+        },
+        "pyemojify": {
+            "hashes": [
+                "sha256:6bbc3c8d52e3df3e4039bc0cad3616d3eb579b4c6e15a11bd5e0ef0d579596a9",
+                "sha256:e70e4cfcfe0aed7b5bc64f39b023d5d62a5f5c0c31c1b7114cd43a059fb14a72"
+            ],
+            "version": "==0.2.0"
+        },
+        "pyflakes": {
+            "hashes": [
+                "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
+                "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.4.0"
+        },
+        "pygments": {
+            "hashes": [
+                "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380",
+                "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==2.10.0"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4",
+                "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.6"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
+                "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
+            ],
+            "index": "pypi",
+            "version": "==2021.3"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
+                "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
+                "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
+                "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b",
+                "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4",
+                "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07",
+                "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba",
+                "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9",
+                "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
+                "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
+                "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
+                "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
+                "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
+                "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
+                "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
+                "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
+                "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
+                "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
+                "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
+                "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
+                "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
+                "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
+                "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
+                "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
+                "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
+                "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
+                "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
+                "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
+                "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
+                "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
+                "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
+                "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
+                "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
+            ],
+            "index": "pypi",
+            "version": "==6.0"
+        },
+        "regex": {
+            "hashes": [
+                "sha256:0416f7399e918c4b0e074a0f66e5191077ee2ca32a0f99d4c187a62beb47aa05",
+                "sha256:05b7d6d7e64efe309972adab77fc2af8907bb93217ec60aa9fe12a0dad35874f",
+                "sha256:0617383e2fe465732af4509e61648b77cbe3aee68b6ac8c0b6fe934db90be5cc",
+                "sha256:07856afef5ffcc052e7eccf3213317fbb94e4a5cd8177a2caa69c980657b3cb4",
+                "sha256:0f594b96fe2e0821d026365f72ac7b4f0b487487fb3d4aaf10dd9d97d88a9737",
+                "sha256:139a23d1f5d30db2cc6c7fd9c6d6497872a672db22c4ae1910be22d4f4b2068a",
+                "sha256:162abfd74e88001d20cb73ceaffbfe601469923e875caf9118333b1a4aaafdc4",
+                "sha256:2207ae4f64ad3af399e2d30dde66f0b36ae5c3129b52885f1bffc2f05ec505c8",
+                "sha256:2409b5c9cef7054dde93a9803156b411b677affc84fca69e908b1cb2c540025d",
+                "sha256:2fee3ed82a011184807d2127f1733b4f6b2ff6ec7151d83ef3477f3b96a13d03",
+                "sha256:30ab804ea73972049b7a2a5c62d97687d69b5a60a67adca07eb73a0ddbc9e29f",
+                "sha256:3598893bde43091ee5ca0a6ad20f08a0435e93a69255eeb5f81b85e81e329264",
+                "sha256:3b5df18db1fccd66de15aa59c41e4f853b5df7550723d26aa6cb7f40e5d9da5a",
+                "sha256:3c5fb32cc6077abad3bbf0323067636d93307c9fa93e072771cf9a64d1c0f3ef",
+                "sha256:416c5f1a188c91e3eb41e9c8787288e707f7d2ebe66e0a6563af280d9b68478f",
+                "sha256:42b50fa6666b0d50c30a990527127334d6b96dd969011e843e726a64011485da",
+                "sha256:432bd15d40ed835a51617521d60d0125867f7b88acf653e4ed994a1f8e4995dc",
+                "sha256:473e67837f786404570eae33c3b64a4b9635ae9f00145250851a1292f484c063",
+                "sha256:4aaa4e0705ef2b73dd8e36eeb4c868f80f8393f5f4d855e94025ce7ad8525f50",
+                "sha256:50a7ddf3d131dc5633dccdb51417e2d1910d25cbcf842115a3a5893509140a3a",
+                "sha256:529801a0d58809b60b3531ee804d3e3be4b412c94b5d267daa3de7fadef00f49",
+                "sha256:537ca6a3586931b16a85ac38c08cc48f10fc870a5b25e51794c74df843e9966d",
+                "sha256:53db2c6be8a2710b359bfd3d3aa17ba38f8aa72a82309a12ae99d3c0c3dcd74d",
+                "sha256:5537f71b6d646f7f5f340562ec4c77b6e1c915f8baae822ea0b7e46c1f09b733",
+                "sha256:563d5f9354e15e048465061509403f68424fef37d5add3064038c2511c8f5e00",
+                "sha256:5d408a642a5484b9b4d11dea15a489ea0928c7e410c7525cd892f4d04f2f617b",
+                "sha256:61600a7ca4bcf78a96a68a27c2ae9389763b5b94b63943d5158f2a377e09d29a",
+                "sha256:6650f16365f1924d6014d2ea770bde8555b4a39dc9576abb95e3cd1ff0263b36",
+                "sha256:666abff54e474d28ff42756d94544cdfd42e2ee97065857413b72e8a2d6a6345",
+                "sha256:68a067c11463de2a37157930d8b153005085e42bcb7ad9ca562d77ba7d1404e0",
+                "sha256:6e1d2cc79e8dae442b3fa4a26c5794428b98f81389af90623ffcc650ce9f6732",
+                "sha256:74cbeac0451f27d4f50e6e8a8f3a52ca074b5e2da9f7b505c4201a57a8ed6286",
+                "sha256:780b48456a0f0ba4d390e8b5f7c661fdd218934388cde1a974010a965e200e12",
+                "sha256:788aef3549f1924d5c38263104dae7395bf020a42776d5ec5ea2b0d3d85d6646",
+                "sha256:7ee1227cf08b6716c85504aebc49ac827eb88fcc6e51564f010f11a406c0a667",
+                "sha256:7f301b11b9d214f83ddaf689181051e7f48905568b0c7017c04c06dfd065e244",
+                "sha256:83ee89483672b11f8952b158640d0c0ff02dc43d9cb1b70c1564b49abe92ce29",
+                "sha256:85bfa6a5413be0ee6c5c4a663668a2cad2cbecdee367630d097d7823041bdeec",
+                "sha256:9345b6f7ee578bad8e475129ed40123d265464c4cfead6c261fd60fc9de00bcf",
+                "sha256:93a5051fcf5fad72de73b96f07d30bc29665697fb8ecdfbc474f3452c78adcf4",
+                "sha256:962b9a917dd7ceacbe5cd424556914cb0d636001e393b43dc886ba31d2a1e449",
+                "sha256:96fc32c16ea6d60d3ca7f63397bff5c75c5a562f7db6dec7d412f7c4d2e78ec0",
+                "sha256:98ba568e8ae26beb726aeea2273053c717641933836568c2a0278a84987b2a1a",
+                "sha256:a3feefd5e95871872673b08636f96b61ebef62971eab044f5124fb4dea39919d",
+                "sha256:a955b747d620a50408b7fdf948e04359d6e762ff8a85f5775d907ceced715129",
+                "sha256:b43c2b8a330a490daaef5a47ab114935002b13b3f9dc5da56d5322ff218eeadb",
+                "sha256:b483c9d00a565633c87abd0aaf27eb5016de23fed952e054ecc19ce32f6a9e7e",
+                "sha256:b9ed0b1e5e0759d6b7f8e2f143894b2a7f3edd313f38cf44e1e15d360e11749b",
+                "sha256:ba05430e819e58544e840a68b03b28b6d328aff2e41579037e8bab7653b37d83",
+                "sha256:ca49e1ab99593438b204e00f3970e7a5f70d045267051dfa6b5f4304fcfa1dbf",
+                "sha256:ca5f18a75e1256ce07494e245cdb146f5a9267d3c702ebf9b65c7f8bd843431e",
+                "sha256:cd410a1cbb2d297c67d8521759ab2ee3f1d66206d2e4328502a487589a2cb21b",
+                "sha256:ce298e3d0c65bd03fa65ffcc6db0e2b578e8f626d468db64fdf8457731052942",
+                "sha256:d5ca078bb666c4a9d1287a379fe617a6dccd18c3e8a7e6c7e1eb8974330c626a",
+                "sha256:d5fd67df77bab0d3f4ea1d7afca9ef15c2ee35dfb348c7b57ffb9782a6e4db6e",
+                "sha256:da1a90c1ddb7531b1d5ff1e171b4ee61f6345119be7351104b67ff413843fe94",
+                "sha256:dba70f30fd81f8ce6d32ddeef37d91c8948e5d5a4c63242d16a2b2df8143aafc",
+                "sha256:dc07f021ee80510f3cd3af2cad5b6a3b3a10b057521d9e6aaeb621730d320c5a",
+                "sha256:dd33eb9bdcfbabab3459c9ee651d94c842bc8a05fabc95edf4ee0c15a072495e",
+                "sha256:e0538c43565ee6e703d3a7c3bdfe4037a5209250e8502c98f20fea6f5fdf2965",
+                "sha256:e1f54b9b4b6c53369f40028d2dd07a8c374583417ee6ec0ea304e710a20f80a0",
+                "sha256:e32d2a2b02ccbef10145df9135751abea1f9f076e67a4e261b05f24b94219e36",
+                "sha256:e6096b0688e6e14af6a1b10eaad86b4ff17935c49aa774eac7c95a57a4e8c296",
+                "sha256:e71255ba42567d34a13c03968736c5d39bb4a97ce98188fafb27ce981115beec",
+                "sha256:ed2e07c6a26ed4bea91b897ee2b0835c21716d9a469a96c3e878dc5f8c55bb23",
+                "sha256:eef2afb0fd1747f33f1ee3e209bce1ed582d1896b240ccc5e2697e3275f037c7",
+                "sha256:f23222527b307970e383433daec128d769ff778d9b29343fb3496472dc20dabe",
+                "sha256:f341ee2df0999bfdf7a95e448075effe0db212a59387de1a70690e4acb03d4c6",
+                "sha256:f5be7805e53dafe94d295399cfbe5227f39995a997f4fd8539bf3cbdc8f47ca8",
+                "sha256:f7f325be2804246a75a4f45c72d4ce80d2443ab815063cdf70ee8fb2ca59ee1b",
+                "sha256:f8af619e3be812a2059b212064ea7a640aff0568d972cd1b9e920837469eb3cb",
+                "sha256:fa8c626d6441e2d04b6ee703ef2d1e17608ad44c7cb75258c09dd42bacdfc64b",
+                "sha256:fbb9dc00e39f3e6c0ef48edee202f9520dafb233e8b51b06b8428cfcb92abd30",
+                "sha256:fff55f3ce50a3ff63ec8e2a8d3dd924f1941b250b0aac3d3d42b687eeff07a8e"
+            ],
+            "version": "==2021.11.10"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
+                "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
+            "version": "==2.26.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
+                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.16.0"
+        },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1",
+                "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"
+            ],
+            "version": "==2.2.0"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:048dac56039a5713f47a554589dc98a442b39226a2b9ed7f82797fcb2fe9253f",
+                "sha256:32a5b3e9a1b176cc25ed048557d4d3d01af635e6b76c5bc7a43b0a34447fbd45"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.3.1"
+        },
+        "sphinxcontrib-applehelp": {
+            "hashes": [
+                "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
+                "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-devhelp": {
+            "hashes": [
+                "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
+                "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-htmlhelp": {
+            "hashes": [
+                "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07",
+                "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.0.0"
+        },
+        "sphinxcontrib-jsmath": {
+            "hashes": [
+                "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
+                "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-qthelp": {
+            "hashes": [
+                "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
+                "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.3"
+        },
+        "sphinxcontrib-serializinghtml": {
+            "hashes": [
+                "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd",
+                "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.1.5"
+        },
+        "tabulate": {
+            "hashes": [
+                "sha256:d7c013fe7abbc5e491394e10fa845f8f32fe54f8dc60c6622c6cf482d25d47e4",
+                "sha256:eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7"
+            ],
+            "version": "==0.8.9"
+        },
+        "toml": {
+            "hashes": [
+                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
+                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
+            ],
+            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.10.2"
+        },
+        "tomli": {
+            "hashes": [
+                "sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee",
+                "sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==1.2.2"
+        },
+        "types-pytz": {
+            "hashes": [
+                "sha256:d58a0688094b768d8e21c044e45861cbcaecba0494fd5b9c5feb3e1739211606",
+                "sha256:dffd77f3efecd3b1555f187a9bf3a638d55fac296700b829c41bd51ec72a6eb7"
+            ],
+            "index": "pypi",
+            "version": "==2021.3.1"
+        },
+        "types-pyyaml": {
+            "hashes": [
+                "sha256:2e27b0118ca4248a646101c5c318dc02e4ca2866d6bc42e84045dbb851555a76",
+                "sha256:d5b318269652e809b5c30a5fe666c50159ab80bfd41cd6bafe655bf20b29fcba"
+            ],
+            "index": "pypi",
+            "version": "==6.0.1"
+        },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:2cdf80e4e04866a9b3689a51869016d36db0814d84b8d8a568d22781d45d27ed",
+                "sha256:829704698b22e13ec9eaf959122315eabb370b0884400e9818334d8b677023d9"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==4.0.0"
+        },
+        "typing-inspect": {
+            "hashes": [
+                "sha256:047d4097d9b17f46531bf6f014356111a1b6fb821a24fe7ac909853ca2a782aa",
+                "sha256:3cd7d4563e997719a710a3bfe7ffb544c6b72069b6812a02e9b414a8fa3aaa6b",
+                "sha256:b1f56c0783ef0f25fb064a01be6e5407e54cf4a4bf4f3ba3fe51e0bd6dcea9e5"
+            ],
+            "version": "==0.7.1"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
+                "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+            "version": "==1.26.7"
+        },
+        "virtualenv": {
+            "hashes": [
+                "sha256:4b02e52a624336eece99c96e3ab7111f469c24ba226a53ec474e8e787b365814",
+                "sha256:576d05b46eace16a9c348085f7d0dc8ef28713a2cabaa1cf0aea41e8f12c9218"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==20.10.0"
+        }
+    }
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f1a0a0c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+atextcrawler is an asynchronous webcrawler indexing text for literal and semantic search.
+
+Its client-side counterpart is [atextsearch](https://gitea.multiname.org/a-text/atextsearch)
+
+atextcrawler crawls and indexes selected websites.
+It starts from a few seed sites and follows their external links.
+Criteria defined in plugin code determine which linked sites (and
+which of their resources) are (recursively) added to the pool.
+
+atextcrawler is written in Python, runs a configurable number of
+async workers concurrently (in one process), uses tensorflow for
+embedding (paragraph-sized) text chunks in a (multi-)language model
+and stores metadata in PostgreSQL and texts in elasticsearch.
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/doc/source/conf.py b/doc/source/conf.py
new file mode 100644
index 0000000..db3c026
--- /dev/null
+++ b/doc/source/conf.py
@@ -0,0 +1,71 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath('.'))))
+import os
+import sys
+
+proj_dir = os.path.dirname(os.path.dirname(os.path.abspath('.')))
+sys.path.insert(0, proj_dir + '/src')
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'atextcrawler'
+copyright = '2021, ibu radempa'
+author = 'ibu radempa'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1.0'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'myst_parser',
+    'sphinx.ext.graphviz',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+autosummary_generate = True
+
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
diff --git a/doc/source/config_template/initial_data/seed_urls.list b/doc/source/config_template/initial_data/seed_urls.list
new file mode 100644
index 0000000..7dc21ea
--- /dev/null
+++ b/doc/source/config_template/initial_data/seed_urls.list
@@ -0,0 +1,23 @@
+# Initial URLs (first run only)
+#
+# To whitelist a URL prepend '+', to blacklist prepend '-'.
+# Comment lines must begin with '#'.
+
+# de
++http://agd.blogsport.de/
++https://blackblogs.org/blogs/
++https://fau.org/
++http://anarchiv.de/
++http://olaf.bbm.de/die-aktion
+-https://www.anarchistischefoderation.de/
+
+# en
++https://anarchistarchivist.com/
++https://bookshelf.theanarchistlibrary.org/library/
++https://archive.elephanteditions.net/library/
++https://blackrosefed.org/
++https://alpineanarchist.org/
++https://nostate.net/
++https://abolishing.blackblogs.org/
++http://library.nothingness.org/
+-https://www.anarchistfederation.net/
diff --git a/doc/source/config_template/main.yaml b/doc/source/config_template/main.yaml
new file mode 100644
index 0000000..8a12feb
--- /dev/null
+++ b/doc/source/config_template/main.yaml
@@ -0,0 +1,88 @@
+# Name of this instance
+# Default value: atextcrawler
+# Allowed values: arbitrary string
+instance_name: atextcrawler
+
+# Which kind of instance is this?
+# Default value: prod
+# Allowed values are:
+# - 'dev': development instance
+# - 'staging': staging instance
+# - 'prod': production instance
+instance_type: prod
+
+# Log level
+# Default value: info
+# Allowed values: critical, error, warning, info, debug
+log_level: info
+
+# Plugins directory
+# If given as relative path, it will be relative to the
+# directory of this file (main.yaml).
+# Read documentation on plugins.
+# Default value: plugins
+# Hint: Create a empty __init__.py in the plugins_dir.
+plugins_dir: plugins
+
+# Parameters for access to the PostgreSQL service
+# No default values; must be set.
+postgresql:
+    host: localhost
+    port: 5432
+    database: atextcrawler
+    user: atextcrawler
+    password: ________________________
+
+# Crawling
+crawl:
+    # Number of concurrent workers
+    # Default value: 10
+    # Allowed values: integer >=0 and <=1000
+    #workers: 3
+
+    # Delay in seconds between attempts to fetch items
+    # from site_queue if the last attempt gave no item
+    # Also the delay in seconds after a worker has found
+    # no site to process
+    # Default value: 600
+    # Allowed values: positive number
+    #site_delay: 10
+
+    # Time interval in seconds between site updates when
+    # handling queued base URLs
+    # Default value: 3600
+    # Allowed values: positive number
+    #site_revisit_interval: 3600
+
+    # Delay in seconds between attempts to process
+    # individual resources (pages etc.) of a site
+    # Default value: 5
+    # Allowed values: positive number
+    #resource_delay: 3
+
+    # Default interval in seconds between full crawls of a site
+    # Default value: 864000 (10 days)
+    # Allowed values: positive number
+    #full_crawl_interval: 864000
+
+    # Default interval in seconds between feed crawls of a site
+    # Default value: 86400 (1 day)
+    # Allowed values: positive number
+    #feed_crawl_interval: 86400
+
+# Parameters for access to the ElasticSearch service
+# No default values; must be set.
+elasticsearch:
+    # host on which ES is running
+    host: localhost
+    # API key for accessing ES
+    api_key: "**********************"
+    # API user id
+    id: "**********************"
+    # Index base name (actual index names will have '_text' etc. appended)
+    index_base_name: atext
+
+# Tensorflow access
+tensorflow:
+    # The prediction endpoint of the model server's sentence model
+    model_server_endpoint: http://localhost:9000/v1/models/sentences:predict
diff --git a/doc/source/config_template/plugins/__init__.py b/doc/source/config_template/plugins/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/doc/source/config_template/plugins/filter_resource_path.py b/doc/source/config_template/plugins/filter_resource_path.py
new file mode 100644
index 0000000..cb84e22
--- /dev/null
+++ b/doc/source/config_template/plugins/filter_resource_path.py
@@ -0,0 +1,22 @@
+"""
+Filter paths found in a resource.
+
+This plugin implements :func:`rp_filter`.
+"""
+
+from typing import Optional
+
+
+def rp_filter(site, durl) -> Optional[str]:
+    """
+    Adjust or filter found paths (may depend on site).
+
+    To filter out a path (i.e., not add it to table `site_path`)
+    return None.
+    """
+    path = durl.pwa()
+    # skip fetching images (linked from a tags; img tags are skipped anyway)
+    if path.lower().endswith('.jpg') or path.lower().endswith('.png'):
+        return None
+    path = path.removesuffix('?amp=1')
+    return path
diff --git a/doc/source/config_template/plugins/filter_site.py b/doc/source/config_template/plugins/filter_site.py
new file mode 100644
index 0000000..09b2282
--- /dev/null
+++ b/doc/source/config_template/plugins/filter_site.py
@@ -0,0 +1,47 @@
+"""
+Relevance estimation of sites.
+
+This plugin implements :func:`site_filter`.
+"""
+
+import re
+
+from atextcrawler.models import Site
+
+MIN_RELEVANCE_SCORE = 5
+
+
+async def site_filter(site: Site) -> bool:
+    """
+    Assess relevance of the site (using language-dependent criteria).
+
+    If the site shall be crawled, return True, else False.
+    """
+    # limit to sites in English or German language
+    if not set(['de', 'en']) & set(site.langs):
+        return False
+    score = 0.0
+    for crit_name, weight, langs, crit_re in re_criteria:
+        if '*' in langs or set(langs) & set(site.langs):
+            findings = crit_re.findall(site.startpage_text)
+            if findings:
+                score += weight * len(findings)
+            if site.title and crit_re.search(site.title):
+                score += 4 * weight
+            if site.description and crit_re.search(site.description):
+                score += 4 * weight
+
+    # TODO: add criteria for named entities (FdA-IFA, FAU, ...)
+
+    return score >= MIN_RELEVANCE_SCORE
+
+
+re_criteria = {
+    (
+        'anarch',
+        1.0,
+        ('*',),
+        re.compile('((?<!p)anarch(ie|ism|ist|y|o|a))', re.I),
+    ),
+    ('libertär', 0.5, ('de'), re.compile('(libert(är|&auml;r))', re.I)),
+}
diff --git a/doc/source/config_template/plugins/filter_site_path.py b/doc/source/config_template/plugins/filter_site_path.py
new file mode 100644
index 0000000..106afcf
--- /dev/null
+++ b/doc/source/config_template/plugins/filter_site_path.py
@@ -0,0 +1,24 @@
+"""
+Plugin for filtering paths of a site to be retrieved.
+
+This plugin implements :func:`sp_filter`.
+"""
+
+
+def sp_filter(site, path, robots) -> bool:
+    """
+    Per-site path filter. Return whether the path shall be retrieved.
+    """
+    if not robots.can_fetch_url(site.base_url + path):
+        return False
+    if 'amusewiki' in site.meta_info.get('generator', '').lower():
+        if any(
+            [
+                path.endswith(end)
+                for end in ('.html', '.epub', '.tex', '.zip', '.pdf')
+            ]
+        ):
+            return False
+        if '/bbselect?' in path:
+            return False
+    return True
diff --git a/doc/source/devel/devel.md b/doc/source/devel/devel.md
new file mode 100644
index 0000000..18ce86b
--- /dev/null
+++ b/doc/source/devel/devel.md
@@ -0,0 +1,63 @@
+## Setup dev environment
+1. You need python 3.9 or later.
+1. Have pipenv installed, e.g. like this: Install pip3, e.g. with `apt install python3-pip`. Then `pip3 install --user pipenv`
+1. Clone the repo and setup a virtualenv:
+```
+cd YOUR_DEV_DIR
+git clone ssh://gitea@gitea-ssh.multiname.org:20106/a-text/atextcrawler.git
+cd atextcrawler
+pipenv install -d
+```
+
+## Configure the instance
+See [installation](installation.md).
+
+## Run
+```
+python -m atextcrawler
+```
+
+## Logging
+Use the configured instance_name (e.g. `atextcrawler_dev`) to select journal messages:
+```
+journalctl -ef SYSLOG_IDENTIFIER=atextcrawler_dev
+```
+
+## Upgrading
+Upgrade dev tools:
+```
+pre-commit autoupdate
+```
+
+## Test and clean manually
+```
+AIOPGQ_POSTGRESQL="host=127.0.0.1 port=5432 database=atextcrawler-dev user=atextcrawler-dev password=*************" python -W ignore -m unittest discover
+mypy --ignore-missing-imports src/atextcrawler
+isort src/atextcrawler
+black -S -t py37 -l 79 src/atextcrawler
+pybetter --exclude B004,B007,B008 src/atextcrawler
+interrogate -i -I -m -v src/atextcrawler
+```
+
+## Release
+There are no releases (currently).
+
+## Useful commands
+
+### Fetch a resource or a site manually
+```
+python -m atextcrawler.resource https://www.katesharpleylibrary.net/
+python -m atextcrawler.site https://www.katesharpleylibrary.net/
+```
+
+### SQL
+```
+drop table crawl; drop table site_path; drop table resource; drop table site cascade; drop table site_feed; drop table site_link; drop table site_queue; drop table kvs;
+
+http -j --auth elastic:*********************** -j DELETE http://127.0.0.1:9200/anarchism_text_*
+
+http -j --auth elastic:*********************** -j GET http://127.0.0.1:9200/_cat/indices
+
+-- stats: sites, paths, resources
+select s.id site_id, s.base_url, spr.n_paths, spr.n_resources, spr.n_chars from site s left join (select sp.site_id, count(sp.path) n_paths, count(r.id) n_resources, sum(r.text_len) n_chars from site_path sp left join resource r on sp.resource_id=r.id group by sp.site_id) spr on spr.site_id=s.id where s.relevant order by s.id;
+```
diff --git a/doc/source/devel/related_work.md b/doc/source/devel/related_work.md
new file mode 100644
index 0000000..5ecb99d
--- /dev/null
+++ b/doc/source/devel/related_work.md
@@ -0,0 +1,64 @@
+## Related work
+* [collection of crawlers](https://github.com/adbar/awesome-crawler)
+* [collection of webscrapers](https://github.com/adbar/awesome-web-scraper)
+
+### crawlers
+* [acrawler](https://acrawler.readthedocs.io/en/latest/)
+* [trafilatura](https://trafilatura.readthedocs.io/en/latest/index.html)
+  * [repo](https://github.com/adbar/trafilatura)
+  * [intro](https://adrien.barbaresi.eu/blog/trafilatura-main-text-content-python.html)
+* [aiohttp_spider](https://github.com/niklak/aiohttp_spider/)
+* [scrapy](https://docs.scrapy.org/en/latest/)
+* [heritrix3](https://github.com/internetarchive/heritrix3/)
+* [YaCy](https://yacy.net/)
+* [searchmysite](https://searchmysite.net/)
+* [spiderling](http://corpus.tools/raw-attachment/wiki/Downloads/spiderling-src-0.84.tar.xz)
+* [aiohttp_spider](https://github.com/niklak/aiohttp_spider)
+* https://github.com/riteshnaik/Crawling-and-Deduplication-of-Polar-Datasets-Using-Nutch-and-Tika
+* [edge search engine](https://memex.marginalia.nu/projects/edge/about.gmi)
+
+#### general
+* [elastic enterprise search](https://www.elastic.co/blog/building-a-scalable-easy-to-use-web-crawler-for-elastic-enterprise-search)
+
+### sitemap parsers
+* [ultimate-sitemap-parser](https://github.com/mediacloud/ultimate-sitemap-parser)
+
+### url handling
+* [courlan](https://pypi.org/project/courlan/)
+
+### language detection
+* [overview](https://stackoverflow.com/questions/39142778/python-how-to-determine-the-language)
+* [guess_language-spirit](https://pypi.org/project/guess_language-spirit/)
+* [guess_language](https://pypi.org/project/guess-language/)
+* [cld3](https://github.com/google/cld3)
+
+### text extraction
+* [JusText](http://corpus.tools/wiki/Justext_changelog) [demo](https://nlp.fi.muni.cz/projects/justext/)
+
+### deduplication
+* [PostgreSQL extension smlar](https://github.com/jirutka/smlar)
+* [use smlar](https://medium.datadriveninvestor.com/the-smlar-plug-in-for-effective-retrieval-of-massive-volumes-of-simhash-data-e429c19da1a3)
+* remove paragraphs with more than 50% word-7-tuples encountered previously
+
+### Extract more meta tags
+* https://github.com/shareaholic/shareaholic-api-docs/blob/master/shareaholic_meta_tags.md
+  https://support.shareaholic.com/hc/en-us/articles/115003085186
+
+### Date parsing dependent on language
+* https://en.wikipedia.org/wiki/Date_format_by_country
+* https://en.wikipedia.org/wiki/Common_Locale_Data_Repository
+* https://pypi.org/project/dateparser/
+* https://github.com/ovalhub/pyicu
+* https://github.com/night-crawler/cldr-language-helpers
+* https://stackoverflow.com/questions/19927654/using-dateutil-parser-to-parse-a-date-in-another-language
+
+ICU
+* https://unicode-org.github.io/icu/userguide/format_parse/datetime/examples.html#parse
+* https://gist.github.com/dpk/8325992
+* https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1DateFormat.html
+* https://unicode-org.github.io/icu/userguide/
+* https://unicode-org.github.io/icu-docs/#/icu4c/
+* https://github.com/ovalhub/pyicu/blob/master/samples/break.py
+* https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+* https://www.unicode.org/reports/tr35/tr35-dates.html#months_days_quarters_eras
+* https://unicode-org.github.io/icu/userguide/format_parse/datetime/#formatting-dates-and-times-overview
diff --git a/doc/source/devel/todo.md b/doc/source/devel/todo.md
new file mode 100644
index 0000000..35e8065
--- /dev/null
+++ b/doc/source/devel/todo.md
@@ -0,0 +1,77 @@
+## TODO
+
+* parse html time tags
+
+* site annotations:
+  * categories
+    * historical (no changes any more since n months)
+    * news
+  * local focus - geonames: http://download.geonames.org/export/dump/cities15000.zip
+
+* allow for tls in elasticsearch config
+
+* replace dashes, dots and quotes: https://github.com/kovidgoyal/calibre/blob/3dd95981398777f3c958e733209f3583e783b98c/src/calibre/utils/unsmarten.py
+```
+        '&#8211;': '--',
+        '&ndash;': '--',
+        '–': '--',
+        '&#8212;': '---',
+        '&mdash;': '---',
+        '—': '---',
+        '&#8230;': '...',
+        '&hellip;': '...',
+        '…': '...',
+        '&#8220;': '"',
+        '&#8221;': '"',
+        '&#8222;': '"',
+        '&#8243;': '"',
+        '&ldquo;': '"',
+        '&rdquo;': '"',
+        '&bdquo;': '"',
+        '&Prime;': '"',
+        '“':'"',
+        '”':'"',
+        '„':'"',
+        '″':'"',
+        '&#8216;':"'",
+        '&#8217;':"'",
+        '&#8242;':"'",
+        '&lsquo;':"'",
+        '&rsquo;':"'",
+        '&prime;':"'",
+        '‘':"'",
+        '’':"'",
+        '′':"'",
+```
+* normalize quotation marks and punctuation in general
+  * https://unicode-table.com/en/sets/quotation-marks/
+  * https://github.com/avian2/unidecode/blob/master/unidecode/x020.py
+  * https://www.fileformat.info/info/unicode/category/Po/list.htm
+  * https://www.gaijin.at/en/infos/unicode-character-table-punctuation
+  * ⁝
+
+* cancel crawls that take too long
+
+* search for "TODO" in code
+
+* feedparser has support for JSON feeds since commit
+  a5939702b1fd0ec75d2b586255ff0e29e5a8a6fc
+  (as of 2020-10-26 in "develop" branch, not part of a release)
+  the version names are 'json1' and 'json11'
+
+* allow site URLs with path, e.g.
+  https://web.archive.org/web/20090320055457/http://www.geocities.com/kk_abacus/
+
+* add more languages
+
+## Ideas
+* use [python-libzim](https://github.com/openzim/python-libzim) to create ZIM archives
+
+* [space-langdetect](https://pypi.org/project/spacy-langdetect/)
+* [langid.py](https://github.com/saffsd/langid.py)
+
+* [gain](https://github.com/gaojiuli/gain)
+* [ruia](https://docs.python-ruia.org/)
+* [demiurge](https://demiurge.readthedocs.io/)
+* [cocrawler](https://github.com/cocrawler/cocrawler/)
+* [aiocrawler](https://github.com/tapanpandita/aiocrawler/)
diff --git a/doc/source/development.rst b/doc/source/development.rst
new file mode 100644
index 0000000..befa427
--- /dev/null
+++ b/doc/source/development.rst
@@ -0,0 +1,9 @@
+Development
+-----------
+
+.. toctree::
+    :maxdepth: 2
+
+    devel/devel
+    devel/todo
+    devel/related_work
diff --git a/doc/source/elasticsearch.md b/doc/source/elasticsearch.md
new file mode 100644
index 0000000..7ccae9d
--- /dev/null
+++ b/doc/source/elasticsearch.md
@@ -0,0 +1,119 @@
+# Howto elasticsearch
+
+## Prerequisites
+On the host (virtualization host) we need:
+```
+# cat /etc/sysctl.d/virtual_memory.conf
+vm.max_map_count=262144
+# sysctl -p /etc/sysctl.d/virtual_memory.conf
+```
+
+If this cannot be done, change this file after installing or upgrading elasticsearch:
+```
+/usr/lib/sysctl.d/elasticsearch.conf
+```
+
+## Setup
+
+### Install package
+In general, see the [elaticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/7.10/deb.html).
+
+We do a manual install. If you configure the apt repo instead, also think about setting
+`RESTART_ON_UPGRADE=true` in `/etc/default/elasticsearch`.
+
+```
+wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.15.2-amd64.deb
+wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.15.2-amd64.deb.sha512
+shasum -a 512 -c elasticsearch-7.15.2-amd64.deb.sha512
+dpkg -i elasticsearch-7.15.2-amd64.deb
+systemctl daemon-reload
+systemctl enable elasticsearch.service
+systemctl start elasticsearch.service
+```
+
+First test:
+```
+http -j GET 127.0.0.1:9200/
+```
+
+### Storage
+
+```
+systemctl stop elasticsearch.service
+mv /var/lib/elasticsearch/ /srv/
+systemctl start elasticsearch.service
+```
+
+Edit /etc/elasticsearch/elasticsearch.yml
+```
+cluster.name: org.a-text.search
+node.name: atext1
+path.data: /srv/elasticsearch
+path.logs: /var/log/elasticsearch
+discovery.seed_hosts: ["atext1.multiname.org"]
+xpack.security.enabled: true
+xpack.security.authc.api_key.enabled: true
+```
+
+```
+systemctl restart elasticsearch
+```
+
+The logfile now is at
+```
+/var/log/elasticsearch/org.a-text.search.log
+```
+
+### Setup passwords
+Setup passwords:
+```
+# /usr/share/elasticsearch/bin/elasticsearch-setup-passwords auto
+Initiating the setup of passwords for reserved users elastic,apm_system,kibana,kibana_system,logstash_system,beats_system,remote_monitoring_user.
+The passwords will be randomly generated and printed to the console.
+Please confirm that you would like to continue [y/N]y
+```
+
+Copy output to /etc/elasticsearch/passwords and
+```
+chmod 400 /etc/elasticsearch/passwords
+```
+
+Check login as user elastic:
+```
+http --auth elastic:************** -j GET http://127.0.0.1:9200/
+```
+
+### Memory limitation
+To limit memory usage
+```
+mkdir /etc/systemd/system/elasticsearch.service.d
+cat >/etc/systemd/system/elasticsearch.service.d/override.conf <<EOF
+[Service]
+LimitMEMLOCK=8G
+
+systemctl stop elasticsearch
+systemctl daemon-reload
+systemctl start elasticsearch
+EOF
+```
+and restart the service.
+
+## Usage
+Some useful requests:
+
+### List indices
+```
+http --auth elastic:$PASS -j GET http://127.0.0.1:9200/_cat/indices
+```
+### Health
+```
+http --auth elastic:$PASS -j GET http://127.0.0.1:9200/_cat/health
+```
+### Node attributes
+```
+http --auth elastic:$PASS -j GET http://127.0.0.1:9200/_cat/nodeattrs
+```
+### Create API key
+```
+http --auth elastic:$PASS -j POST http://127.0.0.1:9200/_security/api_key name=anarchism role_descriptors:='{"anarchism": {"cluster": [], "index": [{"names": ["anarchism_*"], "privileges": ["all"]}]}}'
+```
diff --git a/doc/source/index.rst b/doc/source/index.rst
new file mode 100644
index 0000000..1c34468
--- /dev/null
+++ b/doc/source/index.rst
@@ -0,0 +1,37 @@
+atextcrawler
+============
+
+atextcrawler is an asynchronous webcrawler indexing text
+for literal and semantic search.
+
+Its client-side counterpart is atextsearch_.
+
+atextcrawler crawls and indexes selected websites.
+It starts from a few seed sites and follows their external links.
+Criteria defined in plugin code determine which linked sites (and 
+which of their resources) are (recursively) added to the pool.
+
+atextcrawler is written in Python, runs a configurable number of
+async workers concurrently (in one process), uses tensorflow for
+embedding (paragraph-sized) text chunks in a (multi-)language model
+and stores metadata in PostgreSQL and texts in elasticsearch.
+
+.. _atextsearch: https://gitea.multiname.org/a-text/atextsearch
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   introduction
+   installation
+   maintenance
+   development
+   reference/modules
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/source/installation.md b/doc/source/installation.md
new file mode 100644
index 0000000..300c94b
--- /dev/null
+++ b/doc/source/installation.md
@@ -0,0 +1,122 @@
+# Installation
+Installation was only tested on Debian bullseye (on amd64).
+The instructions below are for this system.
+(Please adapt to other environments.)
+
+## System packages
+```
+apt install pandoc tidy python3-systemd protobuf-compiler libprotobuf-dev
+```
+The protobuf packages are required for python package gcld3 (see below).
+
+## PostgreSQL database
+We need access to a PostgreSQL database. Install PostgreSQL or provide connectivity to a PostgreSQL database over TCP/IP. Create a new database:
+```
+createdb -E UTF8 --lc-collate=C --lc-ctype=C -T template0 -O atextcrawler atextcrawler
+```
+
+## Elasticsearch
+We need access to an elasticsearch instance (over TCP/IP).
+
+Note: TLS is not yet supported, so install this service locally.
+
+See [elasticsearch howto](elasticsearch.md).
+
+## Tensorflow model server
+We need access to a tensorflow model server (over TCP/IP).
+It should serve `universal_sentence_encoder_multilingual`
+or a similar language model.
+
+Note: TLS is not yet supported, so install this service locally.
+
+See [tensorflow howto](tensorflow_model_server.md).
+
+## Setup virtualenv and install atextcrawler
+```
+apt install python3-pip
+adduser --home /srv/atextcrawler --disabled-password --gecos "" atextcrawler
+su - atextcrawler
+cat >>.bashrc <<EOF
+export PYTHONPATH=\$HOME/repo/src
+EOF
+pip3 install --user pipenv
+cat >>.profile <<EOF
+PYTHONPATH=\$HOME/repo/src
+PATH=\$HOME/.local/bin:$PATH
+\$HOME/.local/bin/pipenv shell
+EOF
+exit
+su - atextcrawler
+git clone https://gitea.multiname.org/a-text/atextcrawler.git repo
+cd repo
+pipenv sync
+pipenv install --site-packages  # for systemd
+pre-commit install
+```
+
+Note: One of the dependencies, Python package `tldextract`,
+uses this directory for caching:
+```
+$HOME/.cache/python-tldextract/
+```
+
+## Configure atextcrawler
+As user `atextcrawler` execute
+```
+mkdir $HOME/.config
+cp -r $HOME/repo/doc/source/config_template $HOME/.config/atextcrawler
+```
+
+Edit `$HOME/.config/atextcrawler/main.yaml`.
+
+If you want to override a plugin, copy it to the plugins directory
+and edit it, e.g.
+```
+cp /srv/atextcrawler/repo/src/atextcrawler/plugin_defaults/filter_site.py $HOME/.config/plugins
+```
+
+Optionally edit `$HOME/.config/atextcrawler/initial_data/seed_urls.list`.
+
+Check (and print) the instance configuration:
+```
+python -m atextcrawler.config
+```
+
+## Test run
+To see if it works, run `atextcrawler` from the command line:
+```
+python -m atextcrawler
+```
+You can stop it with `Ctrl-C`; stopping may take a few seconds or even minutes.
+
+## Install systemd service
+To make the service persistent, create a systemd unit file
+`/etc/systemd/system/atextcrawler.service` with this content:
+```
+[Unit]
+Description=atextcrawler web crawler
+Documentation=https://gitea.multiname.org/a-text/atextcrawler
+Requires=network.target
+After=network-online.target
+
+[Service]
+Type=simple
+User=atextcrawler
+Group=atextcrawler
+WorkingDirectory=/srv/atextcrawler/repo
+Environment=PYTHONPATH=/srv/atextcrawler/repo/src
+ExecStart=/srv/atextcrawler/.local/bin/pipenv run python -m atextcrawler
+TimeoutStartSec=30
+ExecStop=/bin/kill -INT $MAINPID
+TimeoutStopSec=180
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target
+```
+and
+```
+systemctl daemon-reload
+systemctl enable atextcrawler
+systemctl start atextcrawler
+```
diff --git a/doc/source/introduction.md b/doc/source/introduction.md
new file mode 100644
index 0000000..32c6193
--- /dev/null
+++ b/doc/source/introduction.md
@@ -0,0 +1,66 @@
+# Introduction
+
+## What atextcrawler does:
+* Start from a seed (white+black-)list of website base URLs
+* Loop over sites selected by applying criteria to the content
+  of the site's start page
+* Crawl the site, i.e. loop over resources of the site
+* Extract plaintext content from the resource (html parsing is
+  optimized for html5); discard non-text content, but handle feeds
+  and sitemaps
+* Extract internal and external links; external links contribute
+  to the site list
+* Keep track of the sites and resources in a PostgreSQL database
+* Store plaintext content of resources in an Elasticsearch index
+* Store vector embeddings of plaintexts also in Elasticsearch
+  using tensorflow model server with a multilingual language model
+
+## Architecture
+There is only one python process running concurrently.
+We use asyncio where possible (almost everywhere).
+
+1. There is a queue of websites, see database table `site_queue`.
+   The queue is fed a) on first startup with seeds, b) manually
+   and c) from crawls which find external links.
+   When the queued is handled new sites are stored to table `site`.
+   New sites are updated, existing sites only if the last update was more than `crawl.site_revisit_delay` seconds in the past.
+   After the queue has been handled there is a delay
+   (`crawl.site_delay` seconds) before repetition.
+1. Updating a site means: the start page is fetched and
+   criteria are applied to its content to determine whether
+   the site is relevant. (It is assumed that (non-)relevance is
+   obvious from the start page already.) If the site is relevant,
+   more information is fetched (e.g. sitemaps).
+1. There is s a configurable number of crawler workers (config
+   `crawl.workers`) which concurrently crawl sites, one at a time
+   per worker. (During the crawl the site is marked as locked using
+   crawl_active=true.) They pick a relevant site which has not been crawled for a certain time ("checkout"), crawl it, and finally mark it as crawled (crawl_active=false, "checkin") and schedule the next crawl.
+   Each crawl (with begin time, end time, number of found (new)
+   resources)) is stored in table `crawl`.
+1. Crawls are either full crawls (including all paths reachable
+   through links from the start page are fetched) or feed crawls (only paths listed in a feed of the site are fetched). The respective (minimum) intervals in which these crawls happens are `full_crawl_interval` and `feed_crawl_interval`.
+   Feed crawls can happen more frequently (e.g. daily).
+1. When a path is fetched it can result in a MetaResource (feed or
+   sitemap) or a TextResource (redirects are followed and irrelevant content is ignored). A TextResource obtained from a path can be very similar to a resource obtained from another path; in this case no new resource is created, but both paths are linked to the same resource (see tables `site_path` and `resource`).
+1. If a MetaResource is fetched and it is a sitemap, its paths are
+   added to table `site_path`. If it is a feed, the feed is stored in table `site_feed` and its paths are added to table `site_path`.
+1. Links between sites are stored in table `site_link`.
+
+## Site annotations
+Database table `site_annotation` can have any number of annotations
+for a base_url. While crawling, these annotations are considered:
+Blacklisting or whitelisting has precedence over function `site_filter`
+(in plugin `filter_site`).
+
+Annotations cannot be managed from within atextcrawler;
+this requires another application, usually [`atextsearch`](https://TODO).
+
+Each annotation requires a base_url of the annotated site and
+if a site with this base_url exists in the `site` table,
+it should also be associated with the site's id (column `site_id`).
+
+## Limitations
+* atextcrawler is not optimized for speed; it is meant to be run as a
+  background task on a server with limited resources
+  (or even an SBC, like raspberry pi, with attached storage)
+* atextcrawler only indexes text, no other resources like images
diff --git a/doc/source/maintenance.md b/doc/source/maintenance.md
new file mode 100644
index 0000000..302e8ba
--- /dev/null
+++ b/doc/source/maintenance.md
@@ -0,0 +1,23 @@
+# Maintenance
+
+## Upgrading
+```
+su - atextcrawler
+pip3 install --user --upgrade pipenv
+cd repo
+git pull
+pipenv sync
+systemctl restart atextcrawler
+```
+
+## Update tldextract
+From time to time run (in the Python virtualenv):
+```
+tldextract --update
+```
+or
+```
+systemctl stop atextcrawler
+rm -r $HOME/.cache/python-tldextract
+systemctl start atextcrawler
+```
diff --git a/doc/source/tensorflow_model_server.md b/doc/source/tensorflow_model_server.md
new file mode 100644
index 0000000..3e90e94
--- /dev/null
+++ b/doc/source/tensorflow_model_server.md
@@ -0,0 +1,98 @@
+# Tensorflow model server
+
+## Setup server
+Prepare:
+```
+apt install gnupg2
+```
+Add repo:
+```
+echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
+curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
+```
+Install package:
+```
+apt update
+apt install tensorflow-model-server
+```
+
+## Setup models
+```
+mkdir -p /srv/tensorflow/workdir
+mkdir -p /srv/tensorflow/models
+```
+Choose models from [tfhub.dev](https://tfhub.dev/) and for each do:
+```
+# example: https://tfhub.dev/google/universal-sentence-encoder-multilingual/3
+mkdir -p /srv/tensorflow/models/universal-sentence-encoder-multilingual/3
+cd /srv/tensorflow/models/universal-sentence-encoder-multilingual/3
+wget https://tfhub.dev/google/universal-sentence-encoder-multilingual/3?tf-hub-format=compressed
+tar xvfz universal-sentence-encoder-multilingual_3.tar.gz
+rm universal-sentence-encoder-multilingual_3.tar.gz
+```
+
+Check:
+```
+tensorflow_model_server --rest_api_port=9000 --model_base_path="/srv/tensorflow/models/universal-sentence-encoder-multilingual/" --model_name=sentences
+```
+
+Config file `/srv/tensorflow/config`:
+```
+model_config_list: {
+  config: {
+    name: "sentences",
+    base_path: "/srv/tensorflow/models/universal-sentence-encoder-multilingual",
+    model_platform: "tensorflow"
+    model_version_policy: {latest{}},
+  },
+  config: {
+    ... (next model)
+  },
+}
+```
+
+## Systemd integration
+Edit /etc/systemd/system/tensorflow.service
+```
+[Unit]
+Description=tensorflow model server
+After=network.target auditd.service
+
+[Service]
+Type=simple
+WorkingDirectory=/srv/tensorflow/workdir
+ExecStart=/usr/bin/tensorflow_model_server --rest_api_port=9000 --model_config_file=/srv/tensorflow/config
+KillMode=process
+Restart=on-failure
+RestartSec=30s
+
+[Install]
+WantedBy=multi-user.target
+```
+and
+```
+systemctl daemon-reload
+systemctl enable tensorflow
+systemctl start tensorflow
+```
+
+Check:
+```
+http -j GET http://localhost:9000/v1/models/sentences
+```
+
+## Usage
+Show model details:
+```
+http -j GET http://localhost:9000/v1/models/sentences/metadata
+```
+
+## Docs
+
+* `/usr/bin/tensorflow_model_server --help`
+* https://github.com/tensorflow/serving/
+* [REST API](https://www.tensorflow.org/tfx/serving/api_rest)
+* https://github.com/hey-car/tensorflow-model-server
+
+Datasets:
+* https://www.tensorflow.org/datasets/catalog/overview
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000..9cb779f
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,48 @@
+ANTI-AUTHORITARIAN LICENSE version 1.0
+________________________________________________________________________________
+
+Obviously, this license is relevant to all who are bound by law.
+
+The licensee ("you") must not be a commercial, military, clerical or
+governmental entity. For this license the term "software" means the program
+code, documentation as well as other data (for instance, language files).
+
+Subject to the respective terms and conditions described below the licensee
+is granted the non-exclusive and non-transferable license to:
+A. make copies of the software
+B. create derivative works ("modifications")
+C. install and run copies or modifications of the software on any number of
+   servers, thereby making them usable for the licensee and possibly others
+D. offer or give copies or modifications of the software, or parts of the
+   unmodified or modified software to others
+
+For these permissions the respective conditions stated below must be met:
+* For permission A condition 1 must be met.
+* For permission B all of the conditions 1, 3, 4 must be met.
+* For permission C all of the conditions 2, 3 must be met.
+* For permission D all of the conditions 1, 2, 3, 4, 5 must be met.
+
+These are the conditions:
+1. You include this copyright notice and license in any copy or modification.
+   In files that contain a reference to it you preserve this reference.
+2. You do not use this software or any modification of it for any commercial
+   purpose or for monetary gain, and also not for any military, governmental
+   or religious purpose; here with commercial purpose we mean activities which
+   have among their goals to make profit, be it monetary profit or any other
+   kind of profit that may entail or contribute to monetary profit.
+3. Demos or screenshots of the modified or unmodified software must not be
+   published in any medium which requires the viewers to pay money in order
+   to see the contents; here money paid for mere internet connectivity (i.e.,
+   independent of the content supplier) is to be disregarded.
+4. You do not impose any further restrictions on this software or any
+   derivative works beyond those restrictions herein.
+5. The copy or modification must include source code, and must allow
+   distribution in source code as well as compiled form. The source code
+   must be the preferred form in which a programmer would modify the program.
+   Deliberately obfuscated source code is not allowed. Intermediate forms
+   such as the output of a preprocessor or translator are not allowed.
+
+For this license itself, if re-used for other software, the following
+copyright and license applies (copyheart license):
+
+♡ Copying is an act of love. Please copy.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..394892c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+# TOML formatted file; see PEP 518
+
+[tool.isort]
+profile = "black"
+#multi_line_output = 3
+
+[tool.black]
+line-length = 79
+target_version = ['py39']
+skip-string-normalization = true
diff --git a/src/atextcrawler/__init__.py b/src/atextcrawler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/atextcrawler/__main__.py b/src/atextcrawler/__main__.py
new file mode 100644
index 0000000..2c6731b
--- /dev/null
+++ b/src/atextcrawler/__main__.py
@@ -0,0 +1,12 @@
+"""
+atextcrawler application execution entry point.
+"""
+
+import asyncio
+
+from .application import Application
+from .config import Config
+
+if __name__ == '__main__':
+    config = Config().get()
+    asyncio.run(Application(config).run())
diff --git a/src/atextcrawler/application.py b/src/atextcrawler/application.py
new file mode 100644
index 0000000..6193c72
--- /dev/null
+++ b/src/atextcrawler/application.py
@@ -0,0 +1,204 @@
+"""
+atextcrawler application.
+"""
+
+import asyncio
+import importlib
+import logging
+import signal
+import sys
+
+from systemd.journal import JournalHandler
+
+from .config import Config
+from .crawl import CrawlWorker
+from .db import PGPool
+from .search import shutdown_engine, startup_engine
+from .site import load_seeds, process_site_queue
+
+plugin_names = ['filter_site', 'filter_site_path', 'filter_resource_path']
+
+
+class Application:
+    """
+    atextcrawler application.
+
+    The basic structure of the application is this:
+      * one site crawler works just on the site_queue: fetching start pages
+        of sites and storing updated site information in table sites
+      * N other CrawlWorkers each do this in a loop:
+        checkout a site that is due for crawl and crawl its resources;
+        they fill the site_queue
+    """
+
+    running = True
+
+    def __init__(self, config=None):
+        if config is None:
+            config = Config().get()
+        self.config = config
+        self.instance_name = config['instance_name']
+        self.instance_type = config['instance_type']
+        log_level = getattr(
+            logging, config['log_level'].upper(), logging.CRITICAL
+        )
+        self.logger = logging.getLogger('atextcrawler')
+        self.logger.setLevel(log_level)
+        if self.instance_type == 'dev':
+            self.logger.addHandler(logging.StreamHandler())
+        else:
+            self.logger.addHandler(
+                JournalHandler(SYSLOG_IDENTIFIER=self.instance_name)
+            )
+        self.logger.propagate = False
+        self.channel = 'atextcrawler_' + self.config['instance_name']
+        msg = f'Instance "{self}" initializing'
+        self.logger.info(msg)
+        self.plugins = self._load_plugins()
+
+    def __str__(self):
+        return self.instance_name
+
+    def _load_plugins(self):
+        """
+        Return a dict mapping plugin names to modules.
+        """
+        modules = {}
+        old_path = sys.path
+        for name in plugin_names:
+            try:
+                plugins_dir = self.config['plugins_dir']
+                sys.path.insert(0, plugins_dir)
+                module = importlib.import_module(name)
+                msg = f'Loading plugin "{name}" from {plugins_dir}'
+            except:
+                module = importlib.import_module(
+                    'atextcrawler.plugin_defaults.' + name
+                )
+                msg = f'Loading plugin "{name}" from default location'
+            self.logger.info(msg)
+            modules[name] = module
+        sys.path = old_path
+        return modules
+
+    async def run(self):
+        """
+        Application lifecycle.
+        """
+        await asyncio.gather(self.wait_for_shutdown(), self.startup())
+        await self.shutdown()
+
+    async def startup(self):
+        """
+        Asynchronous startup.
+        """
+        msg = f'Instance "{self}" starting components'
+        self.logger.info(msg)
+        self.search_engine = await startup_engine(self.config)
+        self.pgpool = await PGPool(self.config['postgresql'])
+        self.pool = self.pgpool.pool
+        await load_seeds(self.config, self.pool)
+        await reset_site_locks(self.pool)
+        worker_count = self.config['crawl']['workers']
+        self.workers = []
+        for worker_number in range(worker_count):
+            worker = await CrawlWorker(self, worker_number, self.pool)
+            self.workers.append(worker)
+        worker_coros = [worker.run() for worker in self.workers]
+        await asyncio.gather(
+            process_site_queue(self, self.pool),
+            self.handle_notifications(),
+            *worker_coros,
+        )
+
+    async def wait_for_shutdown(self):
+        """
+        Create a shutdown event (:class:`asyncio.Event`) and wait for it.
+
+        The event will be set by a signal handler for SIGINT
+        and SIGTERM signals (see :meth:`Application.handle_shutdown_signal`).
+        """
+        self.shutdown_event = asyncio.Event()
+        for sig in (signal.SIGINT, signal.SIGTERM):
+            asyncio.get_running_loop().add_signal_handler(
+                sig, self.handle_shutdown_signal
+            )
+        self.logger.debug(f'{self} waiting for shutdown event')
+        await self.shutdown_event.wait()
+        self.logger.info(f'Instance "{self}" shutdown event')
+
+    def handle_shutdown_signal(self):
+        """
+        Handle shutdown signal.
+        """
+        if self.shutdown_event.is_set():
+            return
+        self.shutdown_event.set()
+        self.running = False
+
+    async def shutdown(self):
+        """
+        Asynchronous shutdown.
+        """
+        self.logger.debug(f'Instance "{self}" shutting down')
+        await self.notify_conn.remove_listener(
+            self.channel, self.listen_callback
+        )
+        await self.pool.release(self.notify_conn)
+        for worker in self.workers:
+            await worker.shutdown()
+        await shutdown_engine(self.search_engine)
+        await self.pgpool.shutdown()
+        self.logger.info(f'Instance "{self}" shutdown completed')
+
+    async def handle_notifications(self):
+        """
+        Handle notifications using PostgreSQL's NOTIFY/LISTEN.
+        """
+        self.notify_conn = await self.pool.acquire()
+        await self.notify_conn.add_listener(self.channel, self.listen_callback)
+
+    def listen_callback(self, *args):
+        """
+        Handle notify event from PostgreSQL.
+        """
+        channel = args[2]
+        if channel != self.channel:
+            return
+        message = args[3]
+        if message.startswith('site_update '):
+            try:
+                site_id = int(message.removeprefix('site_update '))
+                for worker in self.workers:
+                    if worker.site and site_id == worker.site.id_:
+                        msg = (
+                            f'Cancelling worker {worker.worker_number}'
+                            f' (site={site_id}) due to site_update'
+                        )
+                        self.logger.info(msg)
+                        worker.running = False
+            except:
+                pass
+
+    async def sleep(self, duration, t_slice=3):
+        """
+        Sleep for *duration* seconds while self.running.
+
+        Check self.running every *t_slice* seconds.
+        """
+        remaining = duration
+        while remaining > 0 and self.running:
+            await asyncio.sleep(min(t_slice, remaining))
+            remaining -= t_slice
+
+
+async def reset_site_locks(pool):
+    """
+    Remove locks leftover from last run: Set crawl_active=false for all sites.
+
+    This is relevant when the application was not shutdown properly (e.g.
+    when the process was killed).
+    """
+    async with pool.acquire() as conn:
+        sql = "UPDATE site SET crawl_active = false WHERE crawl_active = true"
+        await conn.execute(sql)
diff --git a/src/atextcrawler/assets/iana_langs b/src/atextcrawler/assets/iana_langs
new file mode 100644
index 0000000..33687b8
--- /dev/null
+++ b/src/atextcrawler/assets/iana_langs
@@ -0,0 +1,7 @@
+The recommended language tags to use in webpages are from
+the IANA Language Subtag Registry (BCP47), see:
+https://www.w3.org/International/questions/qa-html-language-declarations
+https://r12a.github.io/app-subtags/
+
+
+wget -O- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | rg '^Subtag: |^Tag: ' atextcrawler/assets/iana_langs_  | sed -e 's/^Subtag: //' | sed -e 's/^Tag: //'
diff --git a/src/atextcrawler/assets/iso_639-1 b/src/atextcrawler/assets/iso_639-1
new file mode 100644
index 0000000..373b69a
--- /dev/null
+++ b/src/atextcrawler/assets/iso_639-1
@@ -0,0 +1,219 @@
+aa
+ab
+ae
+af
+ak
+am
+an
+ar
+as
+av
+ay
+az
+ba
+be
+bg
+bh
+bi
+bm
+bn
+bo
+br
+bs
+ca
+ca
+ce
+ch
+co
+cr
+cs
+cu
+cu
+cu
+cu
+cu
+cv
+cy
+da
+de
+dv
+dv
+dv
+dz
+ee
+el
+en
+eo
+es
+es
+et
+eu
+fa
+ff
+fi
+fj
+fo
+fr
+fy
+ga
+gd
+gd
+gl
+gn
+gu
+gv
+ha
+he
+hi
+ho
+hr
+ht
+ht
+hu
+hy
+hz
+ia
+id
+ie
+ie
+ig
+ii
+ii
+ik
+io
+is
+it
+iu
+ja
+jv
+ka
+kg
+ki
+ki
+kj
+kj
+kk
+kl
+kl
+km
+kn
+ko
+kr
+ks
+ku
+kv
+kw
+ky
+ky
+la
+lb
+lb
+lg
+li
+li
+li
+ln
+lo
+lt
+lu
+lv
+mg
+mh
+mi
+mk
+ml
+mn
+mr
+ms
+mt
+my
+na
+nb
+nb
+nd
+nd
+ne
+ng
+nl
+nl
+nn
+nn
+no
+nr
+nr
+nv
+nv
+ny
+ny
+ny
+oc
+oj
+om
+or
+os
+os
+pa
+pa
+pi
+pl
+ps
+ps
+pt
+qu
+rm
+rn
+ro
+ro
+ro
+ru
+rw
+sa
+sc
+sd
+se
+sg
+si
+si
+sk
+sl
+sm
+sn
+so
+sq
+sr
+ss
+st
+su
+sv
+sw
+ta
+te
+tg
+th
+ti
+tk
+tl
+tn
+to
+tr
+ts
+tt
+tw
+ty
+ug
+ug
+uk
+ur
+uz
+ve
+vi
+vo
+wa
+wo
+xh
+yi
+yo
+za
+za
+zh
+zu
diff --git a/src/atextcrawler/assets/top_1e4 b/src/atextcrawler/assets/top_1e4
new file mode 100644
index 0000000..2c7dbf0
--- /dev/null
+++ b/src/atextcrawler/assets/top_1e4
@@ -0,0 +1,10000 @@
+google.com
+facebook.com
+youtube.com
+microsoft.com
+twitter.com
+tmall.com
+instagram.com
+windowsupdate.com
+qq.com
+linkedin.com
+baidu.com
+apple.com
+wikipedia.org
+netflix.com
+live.com
+sohu.com
+doubleclick.net
+amazon.com
+yahoo.com
+googletagmanager.com
+taobao.com
+youtu.be
+adobe.com
+pinterest.com
+360.cn
+vimeo.com
+bing.com
+reddit.com
+jd.com
+wordpress.com
+office.com
+weibo.com
+zoom.us
+googleusercontent.com
+goo.gl
+github.com
+sina.com.cn
+amazonaws.com
+bit.ly
+microsoftonline.com
+xinhuanet.com
+wordpress.org
+google-analytics.com
+blogspot.com
+vk.com
+fbcdn.net
+tumblr.com
+mozilla.org
+msn.com
+nytimes.com
+whatsapp.com
+flickr.com
+europa.eu
+gravatar.com
+dropbox.com
+cnn.com
+ytimg.com
+alipay.com
+okezone.com
+myshopify.com
+soundcloud.com
+nih.gov
+panda.tv
+medium.com
+t.co
+apache.org
+skype.com
+ebay.com
+csdn.net
+zhanqi.tv
+w3.org
+twitch.tv
+yahoo.co.jp
+spotify.com
+forbes.com
+theguardian.com
+paypal.com
+googlevideo.com
+google.com.hk
+office365.com
+imdb.com
+sourceforge.net
+bbc.co.uk
+aliexpress.com
+googleadservices.com
+macromedia.com
+googlesyndication.com
+archive.org
+bongacams.com
+naver.com
+bbc.com
+github.io
+digicert.com
+cloudflare.com
+stackoverflow.com
+weebly.com
+yandex.ru
+gvt2.com
+creativecommons.org
+amazon.co.jp
+wixsite.com
+amazon.in
+issuu.com
+washingtonpost.com
+who.int
+virginmedia.com
+windows.net
+tianya.cn
+imgur.com
+icloud.com
+huanqiu.com
+app-measurement.com
+ggpht.com
+aparat.com
+etsy.com
+yy.com
+oracle.com
+cdc.gov
+akadns.net
+reuters.com
+google.co.in
+mail.ru
+chaturbate.com
+163.com
+php.net
+google.de
+wix.com
+tinyurl.com
+slideshare.net
+tribunnews.com
+so.com
+godaddy.com
+amazon.co.uk
+akamaiedge.net
+wikimedia.org
+wsj.com
+windows.com
+businessinsider.com
+bloomberg.com
+forms.gle
+youtube-nocookie.com
+17ok.com
+sciencedirect.com
+opera.com
+alibaba.com
+cnet.com
+ok.ru
+outlook.com
+wp.com
+harvard.edu
+google.co.jp
+mit.edu
+gnu.org
+dailymail.co.uk
+1688.com
+opendns.com
+google.com.br
+researchgate.net
+pornhub.com
+ibm.com
+go.com
+usatoday.com
+list-manage.com
+ntp.org
+blogger.com
+fandom.com
+t.me
+stanford.edu
+cnbc.com
+wiley.com
+rakuten.co.jp
+hicloud.com
+livejasmin.com
+indeed.com
+samsung.com
+espn.com
+office.net
+aol.com
+bitly.com
+google.co.uk
+hp.com
+telegraph.co.uk
+facebook.net
+amazon.de
+canva.com
+cnblogs.com
+aaplimg.com
+fb.com
+sogou.com
+hao123.com
+walmart.com
+surveymonkey.com
+nature.com
+tiktok.com
+booking.com
+npr.org
+nasa.gov
+cpanel.net
+identrust.com
+foxnews.com
+nginx.org
+tradingview.com
+eventbrite.com
+dailymotion.com
+un.org
+cloudfront.net
+time.com
+haosou.com
+indiatimes.com
+msedge.net
+babytree.com
+xvideos.com
+force.com
+myspace.com
+ca.gov
+salesforce.com
+slack.com
+bilibili.com
+aliyun.com
+huffingtonpost.com
+behance.net
+addthis.com
+www.gov.uk
+udemy.com
+springer.com
+apple-dns.net
+ted.com
+google.fr
+google.es
+roblox.com
+kompas.com
+doi.org
+cpanel.com
+sharepoint.com
+google.cn
+wired.com
+yelp.com
+detik.com
+chase.com
+zillow.com
+nginx.com
+youku.com
+mysql.com
+google.it
+zhihu.com
+freepik.com
+google.ru
+hugedomains.com
+scorecardresearch.com
+instructure.com
+independent.co.uk
+akamaihd.net
+flipkart.com
+stackexchange.com
+thestartmagazine.com
+scribd.com
+goodreads.com
+themeforest.net
+amazon-adsystem.com
+akamaized.net
+healthline.com
+techcrunch.com
+zendesk.com
+gome.com.cn
+mailchimp.com
+debian.org
+wa.me
+squarespace.com
+daum.net
+latimes.com
+gmail.com
+free.fr
+intel.com
+telegram.org
+unsplash.com
+6.cn
+shutterstock.com
+berkeley.edu
+adnxs.com
+line.me
+tripadvisor.com
+grammarly.com
+wetransfer.com
+googletagservices.com
+statcounter.com
+livejournal.com
+addtoany.com
+wikihow.com
+android.com
+taboola.com
+akamai.net
+msftncsi.com
+xhamster.com
+gvt1.com
+duckduckgo.com
+giphy.com
+amzn.to
+savefrom.net
+theverge.com
+amazon.ca
+netflix.net
+ikea.com
+google.ca
+pixnet.net
+ettoday.net
+webmd.com
+mediafire.com
+intuit.com
+speedtest.net
+twimg.com
+pixabay.com
+sitemaps.org
+kickstarter.com
+ft.com
+craigslist.org
+cbsnews.com
+irs.gov
+beian.gov.cn
+quora.com
+whatsapp.net
+google.com.sg
+nbcnews.com
+theatlantic.com
+nationalgeographic.com
+okta.com
+loc.gov
+usps.com
+investopedia.com
+cornell.edu
+w3schools.com
+ietf.org
+marketwatch.com
+arnebrachhold.de
+cisco.com
+washington.edu
+digg.com
+criteo.com
+rednet.cn
+shopify.com
+zoho.com
+deviantart.com
+miit.gov.cn
+bestbuy.com
+adsrvr.org
+rubiconproject.com
+appsflyer.com
+jimdo.com
+webex.com
+weather.com
+digikala.com
+padlet.com
+wellsfargo.com
+primevideo.com
+about.com
+google.com.tw
+tiktokv.com
+tandfonline.com
+dell.com
+nflxso.net
+akismet.com
+trello.com
+onlinesbi.com
+huffpost.com
+pubmatic.com
+buzzfeed.com
+crashlytics.com
+usnews.com
+marriott.com
+launchpad.net
+fda.gov
+azureedge.net
+prnewswire.com
+cambridge.org
+discord.com
+tistory.com
+ups.com
+disqus.com
+princeton.edu
+pikiran-rakyat.com
+typepad.com
+cnzz.com
+uol.com.br
+globo.com
+stumbleupon.com
+mailchi.mp
+pinimg.com
+ampproject.org
+ilovepdf.com
+avito.ru
+2mdn.net
+economist.com
+hulu.com
+demdex.net
+mashable.com
+statista.com
+tiktokcdn.com
+nypost.com
+tokopedia.com
+sciencemag.org
+alicdn.com
+academia.edu
+msftconnecttest.com
+bandcamp.com
+coursera.org
+whitehouse.gov
+hubspot.com
+change.org
+youronlinechoices.com
+nflximg.com
+pbs.org
+box.com
+constantcontact.com
+huawei.com
+pki.goog
+outbrain.com
+rt.com
+oup.com
+target.com
+51.la
+patreon.com
+soso.com
+liputan6.com
+feedburner.com
+symantec.com
+airbnb.com
+youm7.com
+ebay.de
+advertising.com
+fedex.com
+google.com.vn
+google.com.mx
+trustpilot.com
+aboutads.info
+webs.com
+rlcdn.com
+steampowered.com
+evernote.com
+investing.com
+casalemedia.com
+homedepot.com
+chess.com
+openx.net
+jquery.com
+unesco.org
+fc2.com
+mozilla.com
+sagepub.com
+state.gov
+disneyplus.com
+amazon.fr
+usda.gov
+plesk.com
+columbia.edu
+grid.id
+google.com.tr
+varzesh3.com
+eepurl.com
+vice.com
+arcgis.com
+avast.com
+umich.edu
+hbr.org
+moatads.com
+teamviewer.com
+britannica.com
+mayoclinic.org
+allaboutcookies.org
+accuweather.com
+cbc.ca
+engadget.com
+redhat.com
+abc.net.au
+hdfcbank.com
+lazada.sg
+gofundme.com
+smallpdf.com
+pexels.com
+fiverr.com
+hola.org
+networkadvertising.org
+business.site
+dribbble.com
+noaa.gov
+51sole.com
+psu.edu
+tripod.com
+vox.com
+epa.gov
+zdnet.com
+geocities.com
+bidswitch.net
+setn.com
+coinmarketcap.com
+meetup.com
+ebay.co.uk
+google.co.th
+yale.edu
+bet9ja.com
+amazon.es
+sciencedaily.com
+sun.com
+worldometers.info
+kumparan.com
+metropoles.com
+nist.gov
+hootsuite.com
+guardian.co.uk
+heavy.com
+getpocket.com
+americanexpress.com
+elsevier.com
+example.com
+newyorker.com
+aliexpress.ru
+wayfair.com
+businesswire.com
+dw.com
+iqiyi.com
+google.pl
+upenn.edu
+iso.org
+breitbart.com
+worldbank.org
+sindonews.com
+fidelity.com
+gizmodo.com
+apnews.com
+nvidia.com
+gotowebinar.com
+cdninstagram.com
+momoshop.com.tw
+amazon.it
+redd.it
+fastcompany.com
+typeform.com
+psychologytoday.com
+photobucket.com
+wpengine.com
+vkontakte.ru
+merriam-webster.com
+nike.com
+ieee.org
+telewebion.com
+steamcommunity.com
+byteoversea.com
+edgekey.net
+everesttech.net
+appcenter.ms
+snapchat.com
+jpnn.com
+att.com
+ox.ac.uk
+bluekai.com
+umn.edu
+entrepreneur.com
+xnxx.com
+patch.com
+oreilly.com
+inc.com
+politico.com
+theepochtimes.com
+fortune.com
+elpais.com
+suara.com
+binance.com
+linktr.ee
+plos.org
+gamepedia.com
+google.com.ar
+tudou.com
+google.com.au
+hotjar.com
+uci.edu
+cdstm.cn
+ed.gov
+canada.ca
+mathtag.com
+google.co.id
+wisc.edu
+aliyuncs.com
+jhu.edu
+deloitte.com
+afternic.com
+theconversation.com
+wiktionary.org
+quantserve.com
+capitalone.com
+upwork.com
+epicgames.com
+newsweek.com
+python.org
+spiegel.de
+deepl.com
+sfgate.com
+scientificamerican.com
+newrelic.com
+chicagotribune.com
+bankofamerica.com
+google.com.sa
+agkn.com
+ask.com
+ucla.edu
+bbb.org
+mercadolivre.com.br
+elegantthemes.com
+mercadolibre.com.mx
+namnak.com
+wikia.com
+douban.com
+sfx.ms
+slate.com
+ny.gov
+g.page
+quizlet.com
+yimg.com
+chinadaily.com.cn
+bmj.com
+adp.com
+glassdoor.com
+fb.me
+feedly.com
+manoramaonline.com
+360.com
+ndtv.com
+www.gov.cn
+ubuntu.com
+xfinity.com
+appspot.com
+weforum.org
+live.net
+arxiv.org
+medicalnewstoday.com
+timeanddate.com
+bukalapak.com
+zol.com.cn
+qualtrics.com
+ameblo.jp
+cmu.edu
+utexas.edu
+chouftv.ma
+icicibank.com
+realtor.com
+indiegogo.com
+criteo.net
+tencent.com
+zerodha.com
+cam.ac.uk
+crwdcntrl.net
+lenovo.com
+pcmag.com
+chron.com
+oecd.org
+biomedcentral.com
+cbslocal.com
+playstation.com
+discordapp.com
+thesun.co.uk
+uk.com
+aboutcookies.org
+khanacademy.org
+mirror.co.uk
+nr-data.net
+softonic.com
+telegram.me
+y2mate.com
+arstechnica.com
+bootstrapcdn.com
+bet365.com
+google.com.eg
+apa.org
+nps.gov
+uchicago.edu
+qz.com
+ign.com
+e2ro.com
+bizjournals.com
+orange.fr
+uiuc.edu
+hilton.com
+cnnic.cn
+unity3d.com
+adform.net
+dns.google
+howstuffworks.com
+tapad.com
+zaloapp.com
+dropcatch.com
+merdeka.com
+ftc.gov
+iqbroker.com
+mercadolibre.com.ar
+fastly.net
+asus.com
+fontawesome.com
+verisign.com
+verizon.com
+istockphoto.com
+us.com
+crunchyroll.com
+messenger.com
+bitnami.com
+nba.com
+purdue.edu
+nicovideo.jp
+adsafeprotected.com
+fao.org
+comodoca.com
+parallels.com
+si.edu
+news.com.au
+house.gov
+xing.com
+jotform.com
+over-blog.com
+techradar.com
+mckinsey.com
+visualstudio.com
+krxd.net
+openstreetmap.org
+etoro.com
+ea.com
+nydailynews.com
+smh.com.au
+namu.wiki
+reverso.net
+nikkei.com
+moneycontrol.com
+spotxchange.com
+autodesk.com
+netscape.com
+asos.com
+mgid.com
+kakao.com
+barnesandnoble.com
+usc.edu
+earthlink.net
+nyu.edu
+thehill.com
+shaparak.ir
+gosuslugi.ru
+instructables.com
+umeng.com
+livescience.com
+3lift.com
+pnas.org
+toutiao.com
+abs-cbn.com
+pewresearch.org
+jstor.org
+altervista.org
+trafficmanager.net
+googleblog.com
+blogspot.co.uk
+op.gg
+idntimes.com
+turn.com
+inquirer.net
+hotstar.com
+thesaurus.com
+ning.com
+senate.gov
+lifehacker.com
+eastday.com
+norton.com
+enable-javascript.com
+joomla.org
+aljazeera.com
+azure.com
+thepiratebay.org
+variety.com
+rambler.ru
+allegro.pl
+thedailybeast.com
+geeksforgeeks.org
+sectigo.com
+jrj.com.cn
+uber.com
+census.gov
+dictionary.com
+discord.gg
+azurewebsites.net
+sberbank.ru
+proiezionidiborsa.it
+jsdelivr.net
+xiaomi.com
+viva.co.id
+stripe.com
+secureserver.net
+sakura.ne.jp
+wildberries.ru
+lijit.com
+duke.edu
+vmware.com
+thelancet.com
+exelator.com
+ow.ly
+rollingstone.com
+mookie1.com
+prezi.com
+venturebeat.com
+ladbible.com
+trendyol.com
+google.co.kr
+icio.us
+usgs.gov
+acs.org
+oppomobile.com
+amap.com
+gmw.cn
+fbsbx.com
+ufl.edu
+fool.com
+sky.com
+branch.io
+gstatic.com
+postgresql.org
+atlassian.com
+alwafd.news
+costco.com
+gartner.com
+thefreedictionary.com
+sonhoo.com
+express.co.uk
+northwestern.edu
+zend.com
+shopee.co.id
+getbootstrap.com
+msu.edu
+foursquare.com
+genius.com
+google.com.ua
+withgoogle.com
+proofpoint.com
+imageshack.us
+umd.edu
+angelfire.com
+web.de
+brilio.net
+ucsd.edu
+urbandictionary.com
+9gag.com
+nhk.or.jp
+about.me
+jianshu.com
+ameritrade.com
+talktalk.co.uk
+cctv.com
+seekingalpha.com
+letsencrypt.org
+lemonde.fr
+td.com
+gitlab.com
+themeisle.com
+citi.com
+patria.org.ve
+jamanetwork.com
+sahibinden.com
+mzstatic.com
+wish.com
+newegg.com
+scmp.com
+hatena.ne.jp
+namecheap.com
+rottentomatoes.com
+ria.ru
+bls.gov
+lowes.com
+utoronto.ca
+spankbang.com
+docker.com
+sgsnssdk.com
+digitaltrends.com
+tiktokcdn-in.com
+hatenablog.com
+efu.com.cn
+naukri.com
+people.com
+dhl.com
+thetimes.co.uk
+hhs.gov
+alexa.com
+namasha.com
+teads.tv
+unc.edu
+duolingo.com
+slashdot.org
+gismeteo.ru
+googleapis.com
+theglobeandmail.com
+hm.com
+adjust.com
+gmx.net
+heytapmobi.com
+itu.int
+smartadserver.com
+doubleverify.com
+ssl-images-amazon.com
+privacyshield.gov
+dotomi.com
+nejm.org
+mama.cn
+apachefriends.org
+mixcloud.com
+nfl.com
+cricbuzz.com
+squareup.com
+kaspersky.com
+mixpanel.com
+360doc.com
+shopee.vn
+schwab.com
+sec.gov
+shopee.tw
+boston.com
+rctiplus.com
+bhphotovideo.com
+today.com
+history.com
+google.gr
+moodle.org
+ouedkniss.com
+mitre.org
+media.net
+1rx.io
+ebay.com.au
+google.nl
+hollywoodreporter.com
+w.org
+gamespot.com
+zalo.me
+mi.com
+newscientist.com
+myqcloud.com
+donya-e-eqtesad.com
+wufoo.com
+dedecms.com
+openssl.org
+creativecdn.com
+adobe.io
+m.me
+kapanlagi.com
+woocommerce.com
+xe.com
+freebsd.org
+siemens.com
+xbox.com
+zara.com
+wunderground.com
+focus.cn
+qpic.cn
+eff.org
+marca.com
+www.nhs.uk
+omtrdc.net
+edgesuite.net
+illinois.edu
+in.gr
+java.com
+nymag.com
+calendly.com
+bitbucket.org
+indiamart.com
+drupal.org
+albawabhnews.com
+iyiou.com
+videocampaign.co
+arizona.edu
+indianexpress.com
+google.ro
+techtarget.com
+onlyfans.com
+dcard.tw
+kernel.org
+ebay-kleinanzeigen.de
+unicef.org
+dbs.com.sg
+thegatewaypundit.com
+moz.com
+ucdavis.edu
+asu.edu
+macys.com
+wattpad.com
+metro.co.uk
+colorado.edu
+agafurretor.com
+livestream.com
+51yes.com
+iana.org
+amazon.com.mx
+wp.me
+automattic.com
+espncricinfo.com
+biblegateway.com
+adsymptotic.com
+repubblica.it
+wa.gov
+eastmoney.com
+libsyn.com
+dmoz.org
+icann.org
+gsmarena.com
+linksynergy.com
+borna.news
+lefigaro.fr
+coinbase.com
+yts.mx
+technologyreview.com
+foodnetwork.com
+rutgers.edu
+go-mpulse.net
+fast.com
+licdn.com
+mapquest.com
+gotomeeting.com
+ensonhaber.com
+ustream.tv
+oschina.net
+spb.ru
+thenextweb.com
+redfin.com
+sony.com
+ozon.ru
+uspto.gov
+zippyshare.com
+incometaxindiaefiling.gov.in
+gallup.com
+qoo10.sg
+businessinsider.de
+rediff.com
+alodokter.com
+shopee.com.my
+banvenez.com
+ibicn.com
+w55c.net
+t-online.de
+ih5.cn
+onenote.net
+archives.gov
+mega.nz
+cbssports.com
+sba.gov
+livedoor.jp
+sputniknews.com
+discogs.com
+pinterest.ca
+justice.gov
+ubc.ca
+tutorialspoint.com
+audible.com
+phys.org
+kakaku.com
+wordreference.com
+opensource.org
+gusuwang.com
+howtogeek.com
+ssrn.com
+dot.gov
+vnexpress.net
+eset.com
+sap.com
+matterport.com
+rezka.ag
+liansuo.com
+hbomax.com
+mystrikingly.com
+elbalad.news
+teachable.com
+shutterfly.com
+custhelp.com
+runoob.com
+frontier.com
+miui.com
+simpli.fi
+1337x.to
+xboxlive.com
+ninisite.com
+bluehost.com
+rfihub.com
+mdpi.com
+filimo.com
+contextweb.com
+rs6.net
+myworkdayjobs.com
+dbankcloud.com
+zadn.vn
+mlb.com
+virginia.edu
+samsungapps.com
+prweb.com
+wowhead.com
+last.fm
+hindustantimes.com
+businessweek.com
+usa.gov
+sina.cn
+chinanews.com
+adweek.com
+taleo.net
+edx.org
+globalsign.com
+salon.com
+tremorhub.com
+fbi.gov
+beytoote.com
+envato.com
+hotmart.com
+pwc.com
+yumpu.com
+uptodown.com
+sitescout.com
+osu.edu
+garmin.com
+divar.ir
+podbean.com
+google.com.my
+euronews.com
+nsw.gov.au
+sq.cn
+as.com
+samsungcloud.com
+redbubble.com
+list-manage1.com
+admin.ch
+goo.ne.jp
+bidr.io
+acm.org
+discovery.com
+blackboard.com
+wp.pl
+zemanta.com
+adobedtm.com
+va.gov
+rakuten.com
+imrworldwide.com
+ivi.ru
+unrulymedia.com
+bu.edu
+meituan.com
+colorlib.com
+lonelyplanet.com
+slickdeals.net
+viber.com
+paypal.me
+kde.org
+rbc.ru
+themegrill.com
+secomtrust.net
+hurriyet.com.tr
+ap.org
+frontiersin.org
+is.gd
+google.co.ve
+readthedocs.io
+ethz.ch
+qlogo.cn
+westernjournal.com
+ecosia.org
+thoughtco.com
+boc.cn
+turkiye.gov.tr
+biobiochile.cl
+fpt.vn
+nokia.com
+accenture.com
+fcc.gov
+pastebin.com
+pandora.com
+globenewswire.com
+libero.it
+bostonglobe.com
+wustl.edu
+jiathis.com
+smithsonianmag.com
+t-mobile.com
+onesignal.com
+lego.com
+tamu.edu
+flic.kr
+shopeemobile.com
+infobae.com
+dhs.gov
+consumerreports.org
+onet.pl
+kontan.co.id
+dropboxusercontent.com
+perl.org
+phpbb.com
+bigcartel.com
+mcafee.com
+shein.com
+anchor.fm
+notion.so
+lencr.org
+amd.com
+faqs.org
+weather.gov
+google.be
+pcworld.com
+elwatannews.com
+nordstrom.com
+atdmt.com
+bild.de
+cnnindonesia.com
+tmz.com
+avg.com
+qcloud.com
+usembassy.gov
+zing.vn
+iheart.com
+mercari.com
+ey.com
+vanityfair.com
+indexww.com
+atlassian.net
+hostgator.com
+msnbc.com
+fedoraproject.org
+dmm.co.jp
+yolasite.com
+3gppnetwork.org
+liadm.com
+treasury.gov
+netdna-ssl.com
+dianping.com
+hexun.com
+smugmug.com
+voanews.com
+strava.com
+imf.org
+udn.com
+emxdgt.com
+theregister.co.uk
+seznam.cz
+narod.ru
+com.com
+rfc-editor.org
+thawte.com
+foxbusiness.com
+google.ch
+discover.com
+lg.com
+dafont.com
+it168.com
+ucl.ac.uk
+4shared.com
+irctc.co.in
+500px.com
+garena.com
+oxfordjournals.org
+justdial.com
+att.net
+medscape.com
+myworkday.com
+elsevierhealth.com
+premierleague.com
+globalsign.net
+indiana.edu
+58.com
+firefox.com
+coupang.com
+cnbcindonesia.com
+google.com.ph
+lenta.ru
+gutenberg.org
+computerworld.com
+billboard.com
+snopes.com
+greenpeace.org
+army.mil
+elmundo.es
+asahi.com
+mehrnews.com
+expedia.com
+amazon.cn
+tufts.edu
+ibb.co
+amazon.com.au
+optimizely.com
+sharethrough.com
+thehindu.com
+utah.edu
+brookings.edu
+youporn.com
+schneier.com
+delicious.com
+cancer.org
+google.co.za
+dol.gov
+media-amazon.com
+usertrust.com
+icloud-content.com
+ncsu.edu
+dreamstime.com
+thestar.com
+myanimelist.net
+banggood.com
+doordash.com
+google.at
+liveinternet.ru
+broadcom.com
+congress.gov
+bund.de
+houzz.com
+seattletimes.com
+novell.com
+hyatt.com
+pixiv.net
+cpan.org
+udel.edu
+heart.org
+japanpost.jp
+eatthis.com
+shareasale.com
+ultimate-guitar.com
+mercurynews.com
+neilpatel.com
+cnbeta.com
+lww.com
+gettyimages.com
+mozilla.net
+filesusr.com
+prestashop.com
+allrecipes.com
+rutracker.org
+msecnd.net
+xiaomi.net
+mathrubhumi.com
+crunchbase.com
+heycould.com
+nasdaq.com
+pitt.edu
+texas.gov
+schoology.com
+bugsnag.com
+herokuapp.com
+tsetmc.com
+tejaratnews.com
+cia.gov
+nielsen.com
+app.link
+energy.gov
+gumgum.com
+ico.org.uk
+navy.mil
+freshdesk.com
+trustarc.com
+staticflickr.com
+truste.com
+nintendo.com
+leboncoin.fr
+iherb.com
+eba.gov.tr
+youdao.com
+undp.org
+globalnews.ca
+welt.de
+standard.co.uk
+tinypic.com
+nyc.gov
+google.com.pk
+semanticscholar.org
+digitalocean.com
+eurekalert.org
+pchome.com.tw
+renren.com
+brightcove.net
+disney.com
+impress.co.jp
+me.com
+ads-twitter.com
+ctvnews.ca
+naver.jp
+ihg.com
+goodhousekeeping.com
+yahoodns.net
+g.co
+collegeboard.org
+wikidot.com
+ssa.gov
+thebalance.com
+haxx.se
+chegg.com
+aarp.org
+ucweb.com
+cygwin.com
+google.cl
+cuny.edu
+chinaz.com
+yandex.com
+adblockplus.org
+washingtontimes.com
+reference.com
+typekit.net
+mileroticos.com
+clarin.com
+ntp.org.cn
+remove.bg
+fifa.com
+ytmp3.cc
+panasonic.com
+domaintools.com
+blogspot.ca
+theregister.com
+vidio.com
+irishtimes.com
+ucsb.edu
+ecer.com
+ed.ac.uk
+uidai.gov.in
+stickyadstv.com
+depositphotos.com
+battle.net
+nhl.com
+pippio.com
+answers.com
+citrix.com
+cnki.net
+pearson.com
+huaban.com
+tiny.cc
+cdn-apple.com
+ibtimes.com
+symcd.com
+bleacherreport.com
+imageshack.com
+hubpages.com
+wondershare.com
+freelancer.com
+philips.com
+accor.com
+google.az
+buzzfeednews.com
+deezer.com
+w3school.com.cn
+homestead.com
+aweber.com
+jimdofree.com
+mopub.com
+house365.com
+thingiverse.com
+biglobe.ne.jp
+france24.com
+vrbo.com
+ehow.com
+farfetch.com
+weibo.cn
+athemes.com
+medlineplus.gov
+tesla.com
+payu.in
+flipboard.com
+prothomalo.com
+blog.google
+gatech.edu
+ancestry.com
+openldap.org
+semrush.com
+coindesk.com
+ew.com
+104.com.tw
+junwonsil.com
+pcbaby.com.cn
+utorrent.com
+nbcsports.com
+usbank.com
+service.gov.uk
+nownews.com
+videolan.org
+youth.cn
+minecraft.net
+ebc.net.tw
+chsi.com.cn
+heise.de
+insider.com
+nobelprize.org
+deadline.com
+hotmail.com
+000webhostapp.com
+securityfocus.com
+codecademy.com
+msdn.com
+flaticon.com
+cbs.com
+japantimes.co.jp
+blogspot.de
+coe.int
+lifewire.com
+amazontrust.com
+cancer.gov
+shopee.co.th
+hawaii.edu
+chinanetrank.com
+binomo.com
+digital.com
+vt.edu
+oregonstate.edu
+cell.com
+softpedia.com
+royalbank.com
+nerdwallet.com
+livedoor.com
+georgetown.edu
+realsrv.com
+boredpanda.com
+flurry.com
+mtv.com
+youronlinechoices.eu
+redcross.org
+interia.pl
+thomsonreuters.com
+olympic.org
+cosmopolitan.com
+gimp.org
+redbull.com
+1drv.ms
+shopee.ph
+ccc.de
+bttrack.com
+docusign.net
+kuronekoyamato.co.jp
+spreaker.com
+denverpost.com
+lastpass.com
+coursehero.com
+timeout.com
+superuser.com
+yoox.com
+dspultra.com
+coloros.com
+ovh.net
+startribune.com
+india.com
+uscis.gov
+mac.com
+pdfdrive.com
+zeit.de
+groupon.com
+babyschool.com.cn
+caltech.edu
+comcast.net
+scoop.it
+skroutz.gr
+ilo.org
+calameo.com
+webnode.com
+zdn.vn
+zhibo8.cc
+amplitude.com
+krupdapp.com
+gumroad.com
+nextdoor.com
+cntv.cn
+google.com.pe
+google.dz
+brown.edu
+git-scm.com
+nj.com
+msi.com
+nbc.com
+cameraddns.net
+buydomains.com
+gvt3.com
+jamnews.com
+axisbank.co.in
+vogue.com
+transferwise.com
+logitech.com
+tv9marathi.com
+blizzard.com
+iop.org
+mfadsrvr.com
+informer.com
+about.google
+xkcd.com
+onlinesbi.sbi
+dpreview.com
+fema.gov
+stuff.co.nz
+eventbrite.co.uk
+miamiherald.com
+ouo.io
+pixlr.com
+corriere.it
+vatican.va
+eqxiu.com
+examiner.com
+cointelegraph.com
+hotels.com
+ribunews.com
+britishcouncil.org
+line-apps.com
+elle.com
+socdm.com
+braze.com
+mcgill.ca
+delta.com
+artstation.com
+colostate.edu
+bankrate.com
+reverbnation.com
+trulia.com
+overstock.com
+outbrainimg.com
+wipo.int
+home.blog
+mydrivers.com
+space.com
+sophos.com
+kundelik.kz
+esquire.com
+goal.com
+findlaw.com
+serving-sys.com
+state.tx.us
+uga.edu
+pngtree.com
+tinder.com
+csmonitor.com
+walgreens.com
+uniqlo.com
+searchengineland.com
+perl.com
+ford.com
+tutsplus.com
+hrw.org
+riotgames.com
+openoffice.org
+service-now.com
+tomshardware.com
+bigo.sg
+trendmicro.com
+livechatinc.com
+codecanyon.net
+adobelogin.com
+wixstatic.com
+applovin.com
+360yield.com
+southcn.com
+eonline.com
+gpo.gov
+pbase.com
+joinhoney.com
+warnerbros.com
+jetbrains.com
+morningpost.com.cn
+uc.cn
+technorati.com
+techrepublic.com
+blackberry.com
+alibabausercontent.com
+raspberrypi.org
+zazzle.com
+isc.org
+rumble.com
+olx.pl
+ebay.fr
+uiowa.edu
+wired.co.uk
+sarkariresult.com
+milliyet.com.tr
+stitcher.com
+fivethirtyeight.com
+xero.com
+xiami.com
+filmix.co
+olx.ua
+cvent.com
+mos.ru
+smartsheet.com
+dailycaller.com
+wikibooks.org
+ibytedtos.com
+osha.gov
+vanguard.com
+newsmax.com
+unl.edu
+000webhost.com
+sfu.ca
+popularmechanics.com
+kp.ru
+jpost.com
+123rf.com
+smashingmagazine.com
+nzherald.co.nz
+4chan.org
+258.com
+coingecko.com
+metmuseum.org
+hpe.com
+bustle.com
+abc.es
+alnaharegypt.com
+robinhood.com
+people.com.cn
+treehugger.com
+agoda.com
+gap.com
+gtmetrix.com
+uchi.ru
+google.se
+popsugar.com
+telegra.ph
+dawn.com
+azcentral.com
+eyeota.net
+dallasnews.com
+douyu.com
+appier.net
+bell-labs.com
+jw.org
+wikisource.org
+flashtalking.com
+monster.com
+tdameritrade.com
+quantcount.com
+esa.int
+google.pt
+aa.com
+wplay.co
+uscourts.gov
+pushimg.com
+tableau.com
+lavanguardia.com
+gmu.edu
+dmm.com
+sourceware.org
+loom.com
+faz.net
+ycombinator.com
+creditchina.gov.cn
+boingboing.net
+knowyourmeme.com
+dangdang.com
+google.com.co
+ubisoft.com
+mynavi.jp
+foreignpolicy.com
+amazon.com.br
+trontv.com
+chip.de
+rapid7.com
+docin.com
+elfagr.com
+refinery29.com
+popsci.com
+arduino.cc
+zomato.com
+gwu.edu
+gmanetwork.com
+zdf.de
+2345.com
+channel4.com
+sapo.pt
+laodong.vn
+monday.com
+codepen.io
+storify.com
+straitstimes.com
+fitbit.com
+sharethis.com
+polygon.com
+eee114.com
+sonobi.com
+vvvdj.com
+eghtesadonline.com
+clickfunnels.com
+us.org
+boutell.co.uk
+wistia.com
+buffer.com
+lua.org
+cloudinary.com
+curl.se
+hbo.com
+bola.com
+chinnica.net
+iastate.edu
+ucoz.ru
+emofid.com
+umass.edu
+blogspot.com.es
+boutell.com
+ahrefs.com
+secureworks.com
+centos.org
+indiatoday.in
+gyazo.com
+bancodevenezuela.com
+mydramalist.com
+ohchr.org
+axios.com
+huffingtonpost.co.uk
+freewebs.com
+animeflv.net
+yp.to
+oregonlive.com
+huim.com
+yahoo.net
+avcdn.net
+zappos.com
+aajtak.in
+vanderbilt.edu
+diigo.com
+kooora.com
+creditkarma.com
+af.mil
+archdaily.com
+asana.com
+blogspot.fr
+mpg.de
+kohls.com
+axs.com
+mentalfloss.com
+atlasobscura.com
+xabbs.com
+tribalfusion.com
+nodejs.org
+squid-cache.org
+thebalancecareers.com
+iweihai.cn
+campaign-archive.com
+tabelog.com
+kijiji.ca
+a8.net
+gtimg.cn
+parler.com
+sueddeutsche.de
+malwarebytes.com
+starbucks.com
+alimama.com
+experian.com
+bitchute.com
+searchenginejournal.com
+motor1.com
+chaduo.com
+screenrant.com
+tidaltv.com
+focus.de
+amnesty.org
+strikingly.com
+smartcloudcon.com
+fsu.edu
+wampserver.com
+etrade.com
+livemint.com
+aka.ms
+cookpad.com
+gogoanime.so
+snssdk.com
+hc360.com
+windowsphone.com
+pinterest.co.uk
+onetrust.com
+gamer.com.tw
+id5-sync.com
+si.com
+tagesschau.de
+cloudflare.net
+17track.net
+consultant.ru
+reclameaqui.com.br
+baltimoresun.com
+cookielaw.org
+ksmobile.com
+unblog.fr
+cairo24.com
+livestrong.com
+kommersant.ru
+hespress.com
+clinicaltrials.gov
+footprintdns.com
+mozgcp.net
+powerbi.com
+theage.com.au
+segment.io
+nifty.com
+lifo.gr
+upi.com
+9384.com
+aljazeera.net
+torob.com
+wsu.edu
+gfycat.com
+iliangcang.com
+mhlw.go.jp
+home77.com
+usf.edu
+fbpigeon.com
+iso.ch
+apkpure.com
+sbnation.com
+tynt.com
+adgrx.com
+news18.com
+ucsf.edu
+caixa.gov.br
+elcomercio.com
+weblio.jp
+epfindia.gov.in
+byu.edu
+eater.com
+smaato.net
+clickbank.net
+hardened-php.net
+consensu.org
+bloglovin.com
+brightcove.com
+chosun.com
+nairaland.com
+scholastic.com
+ebay.it
+louisvuitton.com
+redtube.com
+uu.nl
+huffingtonpost.ca
+sjtu.edu.cn
+mimecast.com
+ovh.com
+lycos.com
+gamestop.com
+viglink.com
+phicdn.net
+lse.ac.uk
+fujitsu.com
+rice.edu
+nyaa.si
+nationalpost.com
+sxyprn.com
+iu.edu
+leagueoflegends.com
+tass.ru
+33across.com
+observer.com
+anydesk.com
+isnssdk.com
+made-in-china.com
+ajc.com
+adage.com
+zerohedge.com
+mass.gov
+bt.com
+adroll.com
+srvtrck.com
+codeplex.com
+wbx2.com
+rochester.edu
+purl.org
+rahavard365.com
+uwaterloo.ca
+online-convert.com
+xda-developers.com
+hindawi.com
+vivoglobal.com
+dartmouth.edu
+addthisedge.com
+google.ae
+aofex.com
+qiita.com
+rsc.org
+pku.edu.cn
+unam.mx
+vine.co
+lulu.com
+xmnn.cn
+cvs.com
+campaign-archive1.com
+justia.com
+cima4u.io
+cwi.nl
+nap.edu
+unsw.edu.au
+namebright.com
+example.org
+blogs.com
+stackadapt.com
+worldcat.org
+motherjones.com
+miro.com
+11st.co.kr
+jiameng.com
+91jm.com
+michigan.gov
+infusionsoft.com
+timesonline.co.uk
+cri.cn
+legacy.com
+mheducation.com
+eghtesadnews.com
+doxygen.nl
+globalsources.com
+republika.co.id
+eu.com
+yieldmo.com
+buffalo.edu
+hamariweb.com
+ama-assn.org
+ccleaner.com
+mmload.com
+siteground.com
+thepaper.cn
+gq.com
+convertio.co
+typekit.com
+caniuse.com
+freecodecamp.org
+lifehack.org
+hbs.edu
+thestreet.com
+freep.com
+hgtv.com
+virginia.gov
+man7.org
+tunein.com
+rakuten-sec.co.jp
+inews.id
+crazyegg.com
+siemens.de
+clickpost.jp
+campaign-archive2.com
+timesofisrael.com
+orcid.org
+auth0.com
+wikispaces.com
+itmedia.co.jp
+doxygen.org
+drudgereport.com
+sfr.fr
+libpng.org
+zlib.net
+emalls.ir
+mk.ru
+chartbeat.net
+icims.com
+motorplus-online.com
+liveleak.com
+kotaku.com
+npmjs.com
+owneriq.net
+zeotap.com
+yourdictionary.com
+united.com
+anu.edu.au
+dyndns.org
+ticketmaster.com
+stocktwits.com
+livescore.com
+sporx.com
+macrumors.com
+emarketer.com
+nicsorts-accarade.com
+afip.gob.ar
+weathercn.com
+list-manage2.com
+suntimes.com
+itv.com
+dbankcloud.eu
+bufferapp.com
+championat.com
+sketchfab.com
+ebscohost.com
+substack.com
+google.cz
+recaptcha.net
+vulture.com
+thinkwithgoogle.com
+ualberta.ca
+gamersky.com
+spec.org
+samsungosp.com
+insertlive.com
+jb51.net
+urdupoint.com
+makeuseof.com
+gzip.org
+business-standard.com
+myntra.com
+uoregon.edu
+bedbathandbeyond.com
+dan.com
+parsfootball.com
+note.com
+rackcdn.com
+ameba.jp
+heytapdl.com
+onlamp.com
+lanacion.com.ar
+chartbeat.com
+sportbible.com
+pinduoduo.com
+thespruce.com
+cootek.com
+motorsport.com
+rarbg.to
+jooble.org
+anjuke.com
+edublogs.org
+google.hu
+doodle.com
+cern.ch
+cafe24.com
+campograndenews.com.br
+box.net
+nd.edu
+xianjichina.com
+telephony.goog
+timeweb.ru
+tempo.co
+ionos.com
+cofile.net
+digialm.com
+kahoot.it
+segment.com
+sketchup.com
+iubenda.com
+medicinenet.com
+memcached.org
+angel.co
+lnkd.in
+visa.com
+moma.org
+justgiving.com
+ctrip.com
+clevelandclinic.org
+basecamp.com
+cyol.com
+ml314.com
+pnc.com
+studentaid.gov
+pcre.org
+flipsnack.com
+gazeta.ru
+streamable.com
+faa.gov
+mediawiki.org
+biography.com
+wikiquote.org
+tomsguide.com
+rtve.es
+eclipse.org
+jwplayer.com
+nocookie.net
+dcinside.com
+unity.com
+scdn.co
+oeeee.com
+dropboxapi.com
+skysports.com
+webdav.org
+kitco.com
+defense.gov
+tawk.to
+lazada.co.th
+fastcgi.com
+retailmenot.com
+xrea.com
+apple.co
+unpkg.com
+drom.ru
+google.co.il
+federalreserve.gov
+in.gov
+fliphtml5.com
+oculus.com
+nalog.ru
+freetype.org
+rkdms.com
+dmca.com
+apachelounge.com
+flightradar24.com
+blockchain.com
+hh.ru
+xhamsterlive.com
+cleartax.in
+adidas.com
+tahiamasr.com
+ec-lyon.fr
+hubspot.net
+bbcollab.com
+aclu.org
+realestate.com.au
+netdna-cdn.com
+agah.com
+msauth.net
+donga.com
+heytapmobile.com
+formstack.com
+pa.gov
+infogram.com
+pcgamer.com
+proquest.com
+metacafe.com
+study.com
+t.cn
+rockstargames.com
+blogspot.in
+root-servers.net
+nexusmods.com
+firefoxchina.cn
+cdiscount.com
+tripadvisor.co.uk
+createjs.com
+fazenda.gov.br
+www.gov.br
+itch.io
+kompas.tv
+axisbank.com
+enlightenment.org
+xmlsoft.org
+cc.com
+mastercard.com
+otvfoco.com.br
+yadi.sk
+francetvinfo.fr
+gamesradar.com
+evidon.com
+catchthemes.com
+mmstat.com
+souq.com
+protonmail.com
+washingtonexaminer.com
+sleepycat.com
+roku.com
+post-gazette.com
+cutt.ly
+j.mp
+rsasecurity.com
+goethe.de
+3m.com
+gab.com
+counterpane.com
+get-express-vpn.online
+rei.com
+travelandleisure.com
+modsecurity.org
+channelnewsasia.com
+sozcu.com.tr
+staples.com
+cio.com
+propublica.org
+vseigru.net
+posterous.com
+cars.com
+nus.edu.sg
+kino-teatr.ru
+haaretz.com
+foxsports.com
+yeniakit.com.tr
+generatepress.com
+alexametrics.com
+bolasport.com
+southwest.com
+ziprecruiter.com
+chinatax.gov.cn
+barrons.com
+inquirer.com
+chromium.org
+oath.com
+filezilla-project.org
+soft98.ir
+tsinghua.edu.cn
+altavista.com
+curbed.com
+unimelb.edu.au
+wireshark.org
+pewinternet.org
+rfi.fr
+tradedoubler.com
+nflxext.com
+akstat.io
+yandex.net
+uky.edu
+makemytrip.com
+dezeen.com
+visual.ly
+emory.edu
+onmarshtompor.com
+znds.com
+radissonhotels.com
+hwg.org
+theaustralian.com.au
+hopkinsmedicine.org
+quizizz.com
+wassenaar.org
+pscp.tv
+iz.ru
+googlepages.com
+google.ie
+brainyquote.com
+twilio.com
+serverwatch.com
+theintercept.com
+memurlar.net
+mlive.com
+gulfnews.com
+yodobashi.com
+neobux.com
+exoclick.com
+sephora.com
+kriesi.at
+leparisien.fr
+ijg.org
+syf.com
+ems.com.cn
+arte.tv
+zcool.com.cn
+rbcroyalbank.com
+ipcc.ch
+wistia.net
+52pk.com
+amazon.jobs
+mufg.jp
+nsf.gov
+discovermagazine.com
+ora.com
+mofidonline.com
+cleveland.com
+myftpupload.com
+imperial.ac.uk
+rakuten-bank.co.jp
+lnk.to
+protothema.gr
+t66y.com
+twitpic.com
+demon.co.uk
+dreamhost.com
+apache-ssl.org
+matomo.org
+ynet.com
+acer.com
+vungle.com
+square.site
+churchofjesuschrist.org
+manchester.ac.uk
+ipredictive.com
+caf.fr
+usaa.com
+icq.com
+techsmith.com
+sfchronicle.com
+discuz.net
+pdflib.com
+onaudience.com
+3dmgame.com
+nationalreview.com
+zougla.gr
+tiqcdn.com
+plurk.com
+1tv.ru
+asriran.com
+aappublications.org
+newstrend.news
+apple.news
+scotiabank.com
+nber.org
+complex.com
+uq.edu.au
+51job.com
+pinterest.de
+ukr.net
+ohio.gov
+jpush.cn
+eia.gov
+metacritic.com
+mozaws.net
+freetds.org
+uic.edu
+storm.mg
+teespring.com
+sonyliv.com
+news.cn
+sciencealert.com
+cdbaby.com
+eluniverso.com
+uzone.id
+9to5mac.com
+line-scdn.net
+home.kpmg
+hwcdn.net
+philly.com
+supersonicads.com
+akamai.com
+kizlarsoruyor.com
+technoratimedia.com
+weather.com.cn
+uh.edu
+pressreader.com
+rg.ru
+ubs.com
+missouri.edu
+lentainform.com
+netgear.com
+video-ad-skipper.com
+tianqi.com
+menshealth.com
+specbench.org
+pole-emploi.fr
+commbank.com.au
+theonion.com
+kaskus.co.id
+flashscore.com
+aastocks.com
+marthastewart.com
+mathworks.com
+ynet.co.il
+shimo.im
+skyrock.com
+webthing.com
+terra.com.br
+theme-fusion.com
+penguinrandomhouse.com
+kayak.com
+msftauth.net
+helpshift.com
+odoo.com
+self.com
+orf.at
+thenation.com
+ucsc.edu
+yna.co.kr
+acuityplatform.com
+url.cn
+careerbuilder.com
+federalregister.gov
+soup.io
+mynet.com
+perfectdomain.com
+fas.org
+n-tv.de
+lothar.com
+virtualbox.org
+rd.com
+epwk.com
+mofcom.gov.cn
+etherscan.io
+virustotal.com
+tampabay.com
+sacbee.com
+youradchoices.com
+lazada.com.ph
+cronolog.org
+worldofwarcraft.com
+ge.com
+everydayhealth.com
+inmobi.com
+yektanet.com
+myfitnesspal.com
+newsday.com
+ct.gov
+districtm.io
+warcraftlogs.com
+verywellmind.com
+rit.edu
+iplanet.com
+hud.gov
+nseindia.com
+sbs.com.au
+snapkit.com
+newsearning.com
+towardsdatascience.com
+idnes.cz
+lync.com
+cj.com
+dbankcloud.cn
+webflow.io
+cnrs.fr
+cargocollective.com
+businessinsider.com.au
+cafepress.com
+seek.com.au
+olx.com.br
+bit.do
+weheartit.com
+golux.com
+ce.cn
+routledge.com
+uservoice.com
+mewe.com
+adotmob.com
+harpersbazaar.com
+adentifi.com
+grammarly.io
+unilad.co.uk
+windowsazure.com
+gucci.com
+c-span.org
+onelogin.com
+5ch.net
+meb.gov.tr
+microsoftstream.com
+zhaket.com
+psychcentral.com
+ixigua.com
+uefa.com
+successfactors.com
+abebooks.com
+nur.kz
+ebay.ca
+pstatp.com
+getresponse.com
+ouest-france.fr
+bdstatic.com
+page.link
+gingerall.com
+bdimg.com
+thestar.com.my
+prnt.sc
+health.com
+phpmyadmin.net
+kemdikbud.go.id
+cloudsink.net
+manyvids.com
+yoast.com
+radio.com
+newatlas.com
+topfo.com
+eshkol.io
+mcdonalds.com
+apachetutor.org
+skimresources.com
+google.dk
+blogspot.com.au
+symcb.com
+39.net
+payoneer.com
+legislation.gov.uk
+banesconline.com
+lun.com
+sabq.org
+unhcr.org
+byjus.com
+zhaopin.com
+google.fi
+tp-link.com
+mercadolibre.com.ve
+gh0089.com
+studiopress.com
+wholefoodsmarket.com
+fril.jp
+futbin.com
+canon.com
+opaque.net
+socialblade.com
+cms.gov
+dagospia.com
+utk.edu
+utah.gov
+pulzo.com
+hometax.go.kr
+sinaimg.cn
+bankmellat.ir
+kidshealth.org
+special-offers.online
+xs4all.nl
+spectrum.net
+bitdefender.com
+thethao247.vn
+figma.com
+vivo.com.cn
+onlinedown.net
+kaiserpermanente.org
+blog.com
+blender.org
+moatpixel.com
+pitchfork.com
+dalfak.com
+ryanair.com
+pardot.com
+aspnetcdn.com
+dnevnik.ru
+iobit.com
+ahajournals.org
+onedrive.com
+apachehaus.com
+habr.com
+olx.in
+gmarket.co.kr
+ibyteimg.com
+7-zip.org
+documentcloud.org
+oraclecloud.com
+mykajabi.com
+vg.no
+aizhan.com
+poste.it
+uconn.edu
+yellowpages.com
+yorku.ca
+netperf.org
+mercadolibre.com.co
+dbankcdn.com
+dyntrk.com
+temple.edu
+avaz.ba
+netease.com
+sedo.com
+seasonvar.ru
+viki.com
+simplesite.com
+zopim.com
+opensooq.com
+verizonmedia.com
+hangseng.com
+lynda.com
+gsu.edu
+haibunda.com
+warwick.ac.uk
+billdesk.com
+endclothing.com
+ptt.cc
+springerlink.com
+affordable-papers.net
+ksl.com
+gawker.com
+forrester.com
+postimg.org
+firebaseio.com
+kbb.com
+syr.edu
+adf.ly
+nj.gov
+ibtimes.co.uk
+code.org
+ucr.edu
+rakuten.ne.jp
+honeywell.com
+abb.com
+bookdepository.com
+moe.gov.cn
+omnitagjs.com
+gla.ac.uk
+louvre.fr
+infowars.com
+thrillist.com
+advangelists.com
+meraki.com
+abril.com.br
+skillshare.com
+python.ca
+thrtle.com
+independent.ie
+squidoo.com
+edmunds.com
+anrdoezrs.net
+wegotthiscovered.com
+sendgrid.net
+babycenter.com
+usp.br
+pendo.io
+u-tokyo.ac.jp
+bluestacks.com
+real.com
+cloudwaysapps.com
+sehatq.com
+pearltrees.com
+autohome.com.cn
+ebrun.com
+xjtu.edu.cn
+sendspace.com
+bravesites.com
+geocities.jp
+eklablog.com
+grubhub.com
+daraz.pk
+kknews.cc
+jst.go.jp
+ngacn.cc
+imagemagick.com
+freejobalert.com
+hepsiburada.com
+excite.co.jp
+videojs.com
+dummies.com
+kaspersky-labs.com
+apachetoday.com
+eksisozluk.com
+autotrader.com
+verywellhealth.com
+icbc.com.cn
+bola.net
+apple-cloudkit.com
+shopee.sg
+eluniversal.com.mx
+freee.co.jp
+nelreports.net
+royalsocietypublishing.org
+drugs.com
+americanas.com.br
+infoworld.com
+net-a-porter.com
+thebrighttag.com
+ecwid.com
+ansa.it
+ksmobile.net
+wto.org
+univie.ac.at
+cylance.com
+mcusercontent.com
+slatic.net
+poshmark.com
+ubereats.com
+ti.com
+morningstar.com
+cheezburger.com
+mapbox.com
+sandiegouniontribune.com
+emojipedia.org
+blog.jp
+oregon.gov
+cengage.com
+ibiblio.org
+semasio.net
+verizonwireless.com
+networksolutions.com
+wi.gov
+copyright.gov
+helsinki.fi
+recode.net
+usmagazine.com
+useinsider.com
+rte.ie
+stripchat.com
+rapidshare.com
+uw.edu
+ib-ibi.com
+tenable.com
+dostor.org
+rsafrwd.com
+51auto.com
+thebalancesmb.com
+newrepublic.com
+game-mode.net
+gopro.com
+forgeofempires.com
+wolframalpha.com
+edh.tw
+kdslife.com
+kinja.com
+kochava.com
+eu.org
+rakuten-card.co.jp
+msk.ru
+miami.edu
+crictracker.com
+wikileaks.org
+legit.ng
+yenisafak.com
+page.tl
+govdelivery.com
+business2community.com
+ezinearticles.com
+ovhcloud.com
+digitaljournal.com
+google.bg
+one.com
+impact-ad.jp
+bisnis.com
+creditonebank.com
+gds.it
+angieslist.com
+bestlifeonline.com
+aniview.com
+indiewire.com
+awin1.com
+bodybuilding.com
+appleinsider.com
+ifeng.com
+pingdom.com
+core.ac.uk
+ifttt.com
+winzip.com
+illinois.gov
+mo.gov
+kompasiana.com
+subscene.com
+victoriassecret.com
+hops.id
+threebit.net
+24h.com.vn
+kugou.com
+moneyforward.com
+leeds.ac.uk
+jsonline.com
+githubusercontent.com
+wikiwand.com
+blogspot.it
+blogfa.com
+247sports.com
+informationweek.com
+thekitchn.com
+mediatek.com
+sun-sentinel.com
+wartaekonomi.co.id
+bartleby.com
+match.com
+here.com
+jkforum.net
+mawdoo3.com
+wbur.org
+bigcommerce.com
+bitcoin.org
+gigaom.com
+scotsman.com
+gog.com
+joins.com
+nngroup.com
+origin.com
+parivahan.gov.in
+unixtools.org
+nme.com
+aaa.com
+humblebundle.com
+sbisec.co.jp
+mosalasonline.com
+fraunhofer.de
+chooseauto.com.cn
+lordfilms-s.pw
+mobile.de
+geotrust.com
+politico.eu
+sitepoint.com
+ucalgary.ca
+csod.com
+gnome.org
+autoblog.com
+manganelo.com
+phoca.cz
+juntadeandalucia.es
+mainichi.jp
+dior.com
+news24.com
+tgju.org
+truoptik.com
+nflxvideo.net
+1post4all.com
+rappler.com
+torproject.org
+mybigcommerce.com
+sabah.com.tr
+smarturl.it
+theknot.com
+mobafire.com
+aa.com.tr
+ocregister.com
+abc.com
+whiterabbitpress.com
+google.sk
+unm.edu
+ubi.com
+zapier.com
+opensuse.org
+pp.ua
+starwars.com
+alibabadns.com
+a-msedge.net
+packagist.org
+onelink.me
+google.co.nz
+blismedia.com
+narcity.com
+financialexpress.com
+samsclub.com
+exblog.jp
+hi.ru
+unfccc.int
+ets.org
+liberation.fr
+deepintent.com
+nrk.no
+realclearpolitics.com
+jpmorganchase.com
+statnews.com
+alarabiya.net
+windy.com
+coccoc.com
+apartmenttherapy.com
+beijing.gov.cn
+al.com
+ojooo.com
+maryland.gov
+toyota.com
+ctnsnet.com
+lotterypost.com
+yle.fi
+mercola.com
+52pojie.cn
+waze.com
+pconline.com.cn
+selfridges.com
+sbrf.ru
+thinkific.com
+dnb.com
+lieyunwang.com
+legifrance.gouv.fr
+avaaz.org
+queensu.ca
+networkworld.com
+timeshighereducation.com
+ourworldindata.org
+colorado.gov
+jwpcdn.com
+eltiempo.com
+thecut.com
+udacity.com
+aps.org
+express.dhl
+idc.com
+orlandosentinel.com
+thebase.in
+premierbet.co.ao
+fws.gov
+v2ex.com
+adskeeper.co.uk
+thenational.ae
+mercantilbanco.com
+dailystar.co.uk
+chronicle.com
+glpals.com
+36kr.com
+samplicio.us
+tate.org.uk
+sc-cdn.net
+plex.tv
+wolfram.com
+gearbest.com
+insidehighered.com
+ask.fm
+ucf.edu
+myway.com
+detroitnews.com
+samhsa.gov
+creativemarket.com
+lesechos.fr
+porsche.com
+medicalxpress.com
+stockx.com
+ss2.us
+allmusic.com
+blurb.com
+doodlekit.com
+ipv4only.arpa
+12306.cn
+speakerdeck.com
+dellsupportcenter.com
+glamour.com
+e-recht24.de
+dailykos.com
+mxc.com
+betrad.com
+line.biz
+imgix.net
+bfmtv.com
+ccb.com
+beyla.site
+company-target.com
+northeastern.edu
+atwola.com
+wshareit.com
+ku.edu
+instapaper.com
+presscustomizr.com
+gazeta.pl
+dhl.de
+11467.com
+yomiuri.co.jp
+libreoffice.org
+healthcare.gov
+bleepingcomputer.com
+entrust.net
+startpage.com
+wbidder.online
+getintopc.com
+tut.by
+eurogamer.net
+virgilio.it
+bittorrent.com
+guru99.com
+elmostaqbal.com
+caranddriver.com
+126.com
+nest.com
+uvm.edu
+peatix.com
+curseforge.com
+kpmg.com
+pikabu.ru
+mangadex.org
+sprint.com
+military.com
+avira.com
+homeadvisor.com
+gridoto.com
+dji.com
+whitepages.com
+instacart.com
+amazon.ae
+resumersvo.fun
+uio.no
+americanbar.org
+datadoghq.com
+fotor.com
+meizu.com
+tenor.com
+reason.com
+rand.org
+shrm.org
+pantip.com
+theweek.com
+guinnessworldrecords.com
+jigsy.com
+a2z.com
+uptobox.com
+prom.ua
+liebertpub.com
+zdassets.com
+fsf.org
+cbp.gov
+wpastra.com
+emirates.com
+alz.org
+usdoj.gov
+accorhotels.com
+mariadb.org
+txxx.com
+zingnews.vn
+rtl-theme.com
+canadapost.ca
+swagbucks.com
+6pm.com
+womenshealthmag.com
+dhgate.com
+cryoutcreations.eu
+searchenginewatch.com
+hsbc.com.hk
+diply.com
+bfmio.com
+mercadolibre.com
+javatpoint.com
+symantecliveupdate.com
+credit-agricole.fr
+adingo.jp
+urbanoutfitters.com
+site123.me
+westernunion.com
+wordstream.com
+fourseasons.com
+nationalarchives.gov.uk
+foodmate.net
+huya.com
+google.lk
+rackspace.com
+tagesspiegel.de
+fwmrm.net
+20minutos.es
+odnoklassniki.ru
+cmoney.tw
+cuevana3.io
+hawaaworld.com
+templatemonster.com
+lexisnexis.com
+handelsblatt.com
+3isk.video
+designboom.com
+popads.net
+fu-berlin.de
+uva.nl
+canalblog.com
+bc.edu
+ifixit.com
+afr.com
+bcg.com
+s-microsoft.com
+scielo.br
+appledaily.com
+naturalnews.com
+themezee.com
+encyclopedia.com
+coconala.com
+daftsex.com
+abema.tv
+heytapdownload.com
+unep.org
+signal.org
+haberturk.com
+ad-m.asia
+blogtalkradio.com
+freepik.es
+znanija.com
+hku.hk
+epfl.ch
+lmgtfy.com
+adcolony.com
+realsimple.com
+google.hr
+britishairways.com
+html5up.net
+cfsbcn.com
+garenanow.com
+elespanol.com
+honda.com
+supremecourt.gov
+familydoctor.com.cn
+hackernoon.com
+google.no
+inmotionhosting.com
+yinyuetai.com
+fanfiction.net
+correios.com.br
+prtimes.jp
+teacherspayteachers.com
+rapidgator.net
+clean.gg
+drexel.edu
+lsu.edu
+geekwire.com
+drugabuse.gov
+sydney.edu.au
+narvar.com
+adverdirect.com
+bhg.com
+cjb.net
+bmo.com
+seagate.com
+iea.org
+stltoday.com
+windscribe.com
+nhtsa.gov
+khaleejtimes.com
+nta.go.jp
+investors.com
+parents.com
+docusign.com
+myniceposts.com
+comodo.com
+firstpost.com
+brand-display.com
+bgr.com
+crhoy.com
+splashthat.com
+clickbank.com
+akhbarelyom.com
+eum-appdynamics.com
+jivox.com
+plosone.org
+readthedocs.org
+ala.org
+topuniversities.com
+netvibes.com
+wilfulpessimistic.com
+on24.com
+css-tricks.com
+labanquepostale.fr
+oxforddictionaries.com
+asm.org
+lazada.com.my
+cibc.com
+buzzsprout.com
+myportfolio.com
+sentry.io
+jut.su
+sznews.com
+mxplayer.in
+google.rs
+wyndhamhotels.com
+payscale.com
+ssionsupre.fun
+cnil.fr
+diabetes.org
+beeg.com
+virtualearth.net
+chanel.com
+bestbuy.ca
+extremetech.com
+kenh14.vn
+postrelease.com
+kansascity.com
+esri.com
+successfactors.eu
+afp.com
+pbworks.com
+pagesix.com
+dailydot.com
+so-net.ne.jp
+cretgate.com
+visitsfunk.com
+theblaze.com
+intensedebate.com
+fang.com
+myvisualiq.net
+dramacool.so
+kaggle.com
+apartments.com
+foodandwine.com
+ebayimg.com
+mindmeister.com
+cox.net
+boardgamegeek.com
+nwsource.com
+polyfill.io
+politifact.com
+financialpost.com
+prensalibre.com
+cmbchina.com
+oprah.com
+opentable.com
+ultipro.com
+java.net
+pochta.ru
+seesaw.me
+canada.com
+people.cn
+wangdaidongfang.com
+digiato.com
+drive2.ru
+ewg.org
+tmtpost.com
+nola.com
+jalopnik.com
+smallbiztrends.com
+klarna.com
+laweekly.com
+gd.gov.cn
+paris.fr
+slack-edge.com
+vtv.vn
+razer.com
+active.com
+afreecatv.com
+creativebloq.com
+mirconnect.ru
+collinsdictionary.com
+google.com.bd
+king.com
+elconfidencial.com
+epicurious.com
+bp.com
+we.tl
+benzinga.com
+lbl.gov
+nla.gov.au
+home.pl
+searchsecurer.com
+macworld.com
+kcl.ac.uk
+ocn.ne.jp
+poetryfoundation.org
+pri.org
+laravel.com
+sf-express.com
+wanfangdata.com.cn
+finviz.com
+gao.gov
+chetor.com
+kafan.cn
+thespruceeats.com
+biblehub.com
+yoomoney.ru
+loopme.me
+icy-veins.com
+laposte.fr
+law.com
+tightsaturdayi.com
+jwpltx.com
+bonappetit.com
+britishmuseum.org
+oanda.com
+tap.az
+bcebos.com
+rawstory.com
+cargurus.com
+vectorstock.com
+zozo.jp
+thewrap.com
+ballotpedia.org
+techspot.com
+vcu.edu
+yamaha.com
+seesaa.net
+city-data.com
+tinkoff.ru
+freebitco.in
+netsuite.com
+tilltucked.com
+familysearch.org
+plannerladyreality.com
+rozetka.com.ua
+penzu.com
+baskino.me
+thinkprogress.org
+aftonbladet.se
+sf.net
+pbskids.org
+leetcode.com
+marvel.com
+getcomposer.org
+iol.co.za
+puma.com
+couchsurfing.com
+dstv.com
+bb.com.br
+disquscdn.com
+nexon.com
+news-medical.net
+cas.cn
+officedepot.com
+authorize.net
+filehorse.com
+authorstream.com
+socialmediatoday.com
+oneindia.com
+swissinfo.ch
+y8.com
+delish.com
+artnet.com
+rediffmailpro.com
+panoramio.com
+smore.com
+xgo.com.cn
+food.com
+manta.com
+oppositehometowndrunken.com
+uvic.ca
+jjwxc.net
+chinatimes.com
+ntu.edu.tw
+biorxiv.org
+esheeq.co
+csiro.au
+ndr.de
+ui.com
+motorola.com
+bl.uk
+cybozu.com
+ranker.com
+pinterest.fr
+woot.com
+siriusxm.com
+walmart.ca
+xerox.com
+u.gg
+opencart.com
+ring.com
+imo.im
+medicare.gov
+nrdc.org
+archive.is
+launchdarkly.com
+dzone.com
+worldwildlife.org
+shahid4u.onl
+cuhk.edu.hk
+sc.edu
+windowscentral.com
+workable.com
+kajabi.com
+coub.com
+adhigh.net
+kinokrad.co
+standardmedia.co.ke
+nfpa.org
+discuss.com.hk
+ad4m.at
+chatwork.com
+reviewjournal.com
+tsichuan.com
+rookmemorizevoluntary.com
+vistaprint.com
+caliente.mx
+rug.nl
+gandi.net
+andersnoren.se
+softbank.jp
+photoshelter.com
+chewy.com
+cineulagam.com
+dickssportinggoods.com
+alternet.org
+esy.es
+bzw315.com
+krakow.pl
+5118.com
+google.kz
+headspace.com
+geocaching.com
+adoptapet.com
+wikipedia.com
+tentmess.com
+imooc.com
+189.cn
+manchestereveningnews.co.uk
+medrxiv.org
+askubuntu.com
+wikimapia.org
+enamad.ir
+linecorp.com
+helpguide.org
+zarinpal.com
+dbankcloud.asia
+noon.com
+baomoi.com
+splittingpick.com
+cracked.com
+theculturetrip.com
+sciencenews.org
+tradingeconomics.com
+fotolia.com
+sc.com
+haplat.net
+letterboxd.com
+asp.net
+pinterest.es
+ontario.ca
+cna.com.tw
+essayswriting.org
+bshare.cn
+cpsc.gov
+pantone.com
+bdurl.net
+parliament.uk
+igvita.com
+bbcgoodfood.com
+tvguide.com
+jdoqocy.com
+lazada.co.id
+unrealengine.com
+loupan.com
+sch.gr
+newstatesman.com
+smartasset.com
+jio.com
+in.net
+cfr.org
+trustedreviews.com
+comicbook.com
+boeing.com
+lufthansa.com
+nikkeibp.co.jp
+acuityscheduling.com
+cntraveler.com
+mrporter.com
+5acbd.com
+listindiario.com
+ekaie.com
+ingentaconnect.com
+royalcbd.com
+moonfruit.com
+rqmob.com
+movieweb.com
+kaltura.com
+siteorigin.com
+vocabulary.com
+1377x.to
+ericsson.com
+spring.io
+images-amazon.com
+vidible.tv
+service-public.fr
+similarweb.com
+signupgenius.com
+bostonherald.com
+meti.go.jp
+ssc.nic.in
+notepad-plus-plus.org
+klikbca.com
+internetdownloadmanager.com
+nypl.org
+meduza.io
+themuse.com
+heytapimg.com
+mfisp.com
+bayern.de
+synology.com
+androidauthority.com
+andhrajyothy.com
+unt.edu
+9anime.to
+eenadu.net
+sdsu.edu
+jagranjosh.com
+gumtree.com.au
+epochtimes.com
+dantri.com.vn
+otto.de
+smashballoon.com
+intercom.io
+wdr.de
+bigthink.com
+kuaidi100.com
+ncsl.org
+seriouseats.com
+dividedscientific.com
+kinopoisk.ru
+tesco.com
+zee5.com
+skypeassets.com
+innovid.com
+google.lt
+sltrib.com
+cocolog-nifty.com
+2gis.ru
+genpi.co
+kuleuven.be
+hivestreaming.com
+surveygizmo.com
+wnd.com
+ky.gov
+dzwww.com
+foxitsoftware.com
+omnithrottle.com
+fnac.com
+architecturaldigest.com
+xsrv.jp
+izooto.com
+zoomit.ir
+nordvpn.com
+lazada.vn
+revcontent.com
+onmicrosoft.com
+bhaskar.com
+irna.ir
+sdpnoticias.com
+desmos.com
+googlecode.com
+xtx6.com
+iata.org
+nottingham.ac.uk
+aap.org
+ilsole24ore.com
+kotak.com
+waveapps.com
+wizards.com
+tkqlhce.com
+hawaii.gov
+time.ir
+greenend.org.uk
+clever.com
+blip.tv
+haberler.com
+tudelft.nl
+calculator.net
+tarafdari.com
+absher.sa
+rayjump.com
+biggo.com.tw
+theoutnet.com
+ruten.com.tw
+symfony.com
+cdn.house
+ameli.fr
+ig.com.br
+www.gob.mx
+gaana.com
+zimbra.com
+sastasundar.com
+adkernel.com
+dpbolvw.net
+madrasati.sa
+goldcarpet.cn
+avclub.com
+poki.com
+society6.com
+main.jp
+2m.ma
+comscore.com
+xiu.com
+uni-muenchen.de
+usaid.gov
+brave.com
+districtbaloneywhiskers.com
+isna.ir
+sina.com
+elitedaily.com
+ethetrader.com
+thejakartapost.com
+fnb.co.za
+iasds01.com
+sleepfoundation.org
+yammer.com
+okcupid.com
+ispot.tv
+royalmail.com
+hotpepper.jp
+doswinuba.com
+sproutsocial.com
+energystar.gov
+shopbop.com
+tiki.vn
+ohio-state.edu
+umontreal.ca
+diariolibre.com
+tiscali.it
+ons.gov.uk
+comcast.com
+erne.co
+steemit.com
+9978.cn
+emerald.com
+panda.org
+justpaste.it
+novinky.cz
+heroku.com
+bmi.ir
+cbcloud.sg
+codeproject.com
+logmein.com
+roozaneh.net
+youjizz.com
+mailerlite.com
+businessnewsdaily.com
+google.by
+airtable.com
+postimg.cc
+kqzyfj.com
+aadrm.com
+rajasthan.gov.in
+binomo-website.com
+bangkokpost.com
+mediapost.com
+mostaghelonline.com
+blogspot.nl
+steamstatic.com
+china.org.cn
+spanishdict.com
+qualcomm.com
+unh.edu
+wps.cn
+toppr.com
+kff.org
+sxsw.com
+slack-imgs.com
+kqed.org
+s-onetag.com
+tfl.gov.uk
+stern.de
+se.com
+9lianmeng.com
+screencast.com
+fmkorea.com
+olx.com.pk
+airtel.in
+insightexpressai.com
+enstage-sas.com
+1password.com
+mediaset.it
+easeus.com
+boxofficemojo.com
+dream.co.id
+privatbank.ua
+hermes.com
+turnitin.com
+aboutamazon.com
+adelaide.edu.au
+infolinks.com
+sears.com
+spglobal.com
+smashwords.com
+pypi.org
+nouvelobs.com
+trustwave.com
+turbo.az
+fordham.edu
+lastampa.it
+exe.app
+gjirafa.com
+bitcointalk.org
+gadgetnews.net
+auction.co.kr
+safedog.cn
+cardinalcommerce.com
+paypalobjects.com
+ucoz.com
+famethemes.com
+khtahmar.com
+saksfifthavenue.com
+fiu.edu
+kiplinger.com
+volvocars.com
+ou.edu
+sparknotes.com
+pchouse.com.cn
+wowkeren.com
+google.com.ly
+printfriendly.com
+footprint.net
+blogspot.com.br
+bancoestado.cl
+360safe.com
+filehippo.com
+toptal.com
+suntrust.com
+nghttp2.org
+sh.gov.cn
+tvtropes.org
+specificfeeds.com
+ku.dk
+wzrkt.com
+countryliving.com
+olymptrade.com
+jezebel.com
+amazonvideo.com
+hk01.com
+activehosted.com
+heritage.org
+greenhouse.io
+fineartamerica.com
+apple.com.cn
+pastemagazine.com
+grab.com
+euractiv.com
+ghost.org
+arzdigital.com
+intentiq.com
+commondreams.org
+docdroid.net
+freehostia.com
+peta.org
+berlin.de
+y5en.com
+yxdown.com
+download.com
+bol.com
+pdffiller.com
+jnu.edu.cn
+lever.co
+gencat.cat
+vancouversun.com
+androidcentral.com
+fendi.com
+quantcast.com
+snapwidget.com
+indosport.com
+messefrankfurt.com
+toto803.com
+bnf.fr
+mayoclinic.com
+chinajsq.cn
+clck.ru
+ssacdn.com
+suumo.jp
+uni-heidelberg.de
+tdbank.com
+cda.pl
+mongodb.com
+adition.com
+uu.se
+miitbeian.gov.cn
+tver.jp
+scroll.in
+ncl.ac.uk
+scopus.com
+anandtech.com
+inhabitat.com
+lkqd.net
+derstandard.at
+storiespace.com
+e-planning.net
+nobitex.ir
+shop-pro.jp
+inkscape.org
+case.edu
+maine.gov
+leo.org
+umblr.com
+myfreecams.com
+ppomppu.co.kr
+prntscr.com
+msu.ru
+globaltimes.cn
+ml.com
+usgbc.org
+xataka.com
+dangbei.com
+pagesperso-orange.fr
+icicidirect.com
+kargo.com
+mxptint.net
+alipayobjects.com
+qodeinteractive.com
+toggl.com
+aliapp.org
+walkme.com
+smartrecruiters.com
+gtimg.com
+fashionfindday.com
+blogspot.jp
+whatismyipaddress.com
+media6degrees.com
+ndrc.gov.cn
+ispconfig.org
+sas.com
+jcpenney.com
+rpi.edu
+hypebeast.com
+marketo.com
+vivo.com
+miaopai.com
+paytm.in
+fox.com
+myfonts.com
+trust.org
+bahn.de
+mytheresa.com
+minds.com
+worktile.com
+mikecrm.com
+schema.org
+thriveglobal.com
+vanguardngr.com
+reliefweb.int
+denetsuk.com
+revjet.com
+esteri.it
+12377.cn
+business.com
+megaupload.com
+gesetze-im-internet.de
+tripadvisor.in
+giglio.com
+ladsp.com
+yc58.com
+marieclaire.com
+handle.net
+sony.net
+farsnews.ir
+producthunt.com
+extend.tv
+hinet.net
+seattlepi.com
+soap2day.to
+lazada.com
+nato.int
+mcmaster.ca
+jmw.com.cn
+brainly.in
+shopstyle.com
+unige.ch
+jagran.com
+ltn.com.tw
+uc.edu
+10010.com
+tasnimnews.com
+ecv360.com
+futurelearn.com
+gnupg.org
+rferl.org
+businessinsider.in
+nsatc.net
+kth.se
+ritzcarlton.com
+mihoyo.com
+nami.org
+postermywall.com
+spiceworks.com
+kremlin.ru
+oxu.az
+dnaindia.com
+starwoodhotels.com
+avct.cloud
+rezync.com
+10jqka.com.cn
+sc-static.net
+adbtc.top
+thinkgeek.com
+ximalaya.com
+owasp.org
+interfax.ru
+aporasal.net
+r-project.org
+gm.com
+clemson.edu
+geico.com
+archlinux.org
+ustc.edu.cn
+idealo.de
+nate.com
+nbcnewyork.com
+oclc.org
+n11.com
+dailytelegraph.com.au
+365jia.cn
+e-monsite.com
+greatandhra.com
+acrobat.com
+akc.org
+stlouisfed.org
+rae.es
+cloudconvert.com
+histats.com
+nolo.com
+edweek.org
+pullcm.com
+tennessean.com
+folkd.com
+mof.gov.cn
+inquisitr.com
+priceline.com
+dr.dk
+getty.edu
+mosreg.ru
+dable.io
+onetag-sys.com
+sports.ru
+patagonia.com
+webtoons.com
+allocine.fr
+tilda.ws
+alamy.com
+unodc.org
+likee.video
+businessweekly.com.tw
+hightail.com
+doramy.club
+burberry.com
+liftoff.io
+newsobserver.com
+newsnow.co.uk
+sanook.com
+newswire.ca
+tamin.ir
+masterclass.com
+mango.com
+blog.ir
+ada.support
+vietnamnet.vn
+marinetraffic.com
+charlotteobserver.com
+sportingnews.com
+tci.ir
+mwbsys.com
+marketingland.com
+baiducontent.com
+smbc-card.com
+agacelebir.com
+br.de
+bldrdoc.gov
+piktochart.com
+samsungcloudsolution.net
+nybooks.com
+neimanmarcus.com
+onenote.com
+vam.ac.uk
+bitcoin.com
+123-reg-new-domain.co.uk
+uwo.ca
+lexpress.fr
+houstonchronicle.com
+ad-stir.com
+invisionapp.com
+europa.eu.int
+lu.se
+jpush.io
+aralego.com
+yjc.ir
+zimbio.com
+fararu.com
+paytm.com
+shell.com
+kissmetrics.com
+animoto.com
+suning.com
+emeraldinsight.com
+elcomercio.pe
+sophosupd.com
+secondlife.com
+infoplease.com
+netcraft.com
+lichess.org
+svc.ms
+usu.edu
+rightmove.co.uk
+vesti.ru
+elastic.co
+mindbodyonline.com
+ready.gov
+band.us
+collider.com
+visualcapitalist.com
+york.ac.uk
+ua.edu
+xiaohongshu.com
+edgyconnaterag.com
+wn.com
+dailyrecord.co.uk
+qualys.com
+open.ac.uk
+pelisplus.me
+tsa.gov
+dingtalk.com
+telekom.com
+pornhubpremium.com
+wenming.cn
+sakshi.com
+omegle.com
+webinarjam.com
+namava.ir
+courant.com
+wwd.com
+nespresso.com
+allafrica.com
+qatarairways.com
+iefimerida.gr
+vmall.com
+thedrum.com
+digiday.com
+pullcf.com
+tasteofhome.com
+123c.vn
+takealot.com
+eeoc.gov
+wargaming.net
+state.co.us
+dc.gov
+d1net.com
+1fichier.com
+xhamster7.desi
+wanadoo.fr
+bazaarvoice.com
+boohoo.com
+badoo.com
+gatesfoundation.org
+unwomen.org
+findarticles.com
+adtelligent.com
+ul.com
+encuentra24.com
+ana.co.jp
+citylab.com
+bizrate.com
+mobile01.com
+mendeley.com
+jwplatform.com
+torontosun.com
+google.co.ao
+draftkings.com
+monash.edu
+itau.com.br
+buyma.com
+inverse.com
+jetpack.com
+pmi.org
+tribune.com.pk
+12371.cn
+realtor.ca
+st-andrews.ac.uk
+gazetaexpress.com
+outsideonline.com
+antaranews.com
+bom.gov.au
+ghanaweb.com
+daringfireball.net
+survata.com
+inner-active.mobi
+shiksha.com
+jumia.com.ng
+supersonic.com
+tripsavvy.com
+viralporn.com
+hnu.edu.cn
+google.com.do
+lamabang.com
+lolesports.com
+overdrive.com
+corpscorp.online
+yallakora.com
+lancers.jp
+cedexis-radar.net
+vecteezy.com
+luisaviaroma.com
+litres.ru
+nos.nl
+ally.com
+mic.com
+slashgear.com
+podomatic.com
+mgtv.com
+awwwards.com
+econsultancy.com
+somoynews.tv
+mensjournal.com
+zoominfo.com
+netcoresmartech.com
+wechat.com
+battlenet.com.cn
+topsy.com
+runnersworld.com
+scene7.com
+mediaroom.com
+google.com.ec
+bts.gov
+admanmedia.com
+rapidssl.com
+gazzettadelsud.it
+aip.org
+ipstatp.com
+lepoint.fr
+redditstatic.com
+plala.or.jp
+mercadolibre.cl
+alsbbora.info
+sudannews365.org
+archiveofourown.org
+inven.co.kr
+care2.com
+nzz.ch
+export.gov
+sheknows.com
+offerup.com
+mvideo.ru
+vestacp.com
+turner.com
+ecollege.com
+ru.com
+thehindubusinessline.com
+forexfactory.com
+ello.co
+computerhope.com
+tum.de
+baylor.edu
+f-secure.com
+gst.gov.in
+infoseek.co.jp
+adyen.com
+getui.com
+salesforceliveagent.com
+ig.com
+kodak.com
+dominos.com
+openedition.org
+4px.com
+golang.org
+b2clogin.com
+51cto.com
+morganstanley.com
+tagged.com
+djangoproject.com
+4channel.org
+unicode.org
+anthropologie.com
+townhall.com
+doctolib.fr
+adrta.com
+lendingtree.com
+idaho.gov
+coinpayu.com
+heraldsun.com.au
+smzdm.com
+ennaharonline.com
+voot.com
+lightwidget.com
+viu.com
+20minutes.fr
+proboards.com
+revenuenetworkcpm.com
+sport.es
+punchng.com
+kiva.org
+computerweekly.com
+mail.com
+danawa.com
+almubasher.com.sa
+webself.net
+gov.ao
+american.edu
+statefarm.com
+skynet.be
+zotero.org
+openculture.com
+follow.it
+hpjav.tv
+cra-arc.gc.ca
+cyberpolice.cn
+nyt.com
+jalbum.net
+trademe.co.nz
+fortunecity.com
+nhentai.net
+marketplace.org
+faithfulfacultativeladder.com
+alaska.gov
+indystar.com
+kyoto-u.ac.jp
+rt.ru
+expressvpn.com
+sofascore.com
+smu.edu
+cint.com
+envato.market
+newsvine.com
+cinecalidad.is
+europapress.es
+ucm.es
+researchnow.com
+sharefile.com
+cato.org
+google.com.kw
+democracynow.org
+americanprogress.org
+pluralsight.com
+elintransigente.com
+blackboardcdn.com
+126.net
+cettire.com
+de.tl
+szu.edu.cn
+bepress.com
+popmama.com
+c212.net
+whmcs.com
+michaels.com
+funnyordie.com
+sendinblue.com
+tapjoy.com
+georgia.gov
+certum.pl
+mundodeportivo.com
+poznan.pl
+acpjournals.org
+jawapos.com
+most.gov.cn
+adobeconnect.com
+commonsensemedia.org
+trthaber.com
+iaea.org
+iflscience.com
+netfirms.com
+futurism.com
+rutube.ru
+tn.gov
+alberta.ca
+koreatimes.co.kr
+citilink.ru
+ato.gov.au
+adafruit.com
+teenvogue.com
+winbank.gr
+convio.net
+socialmediaexaminer.com
+france.tv
+fastcodesign.com
+shape.com
+nature.org
+inria.fr
+dikaiologitika.gr
+streamlabs.com
+globalresearch.ca
+degruyter.com
+ub.edu
+kobo.com
+easyjet.com
+redlink.com.ar
+guidestar.org
+ulta.com
+faradars.org
+jpmorgan.com
+arynews.tv
+zju.edu.cn
+jit.si
+xunlei.com
+shafaqna.com
+mnn.com
+nationalacademies.org
+estadao.com.br
+oath.cloud
+bing.net
+thunderbird.net
+formula1.com
+alibaba-inc.com
+cabelas.com
+worldoftanks.eu
+tubemogul.com
+upstox.com
+brainly.com
+o2.pl
+childrensalon.com
+22.cn
+cancerresearchuk.org
+mediaite.com
+homes.co.jp
+gigabyte.com
+milenio.com
+equifax.com
+desmoinesregister.com
+easytomessage.com
+govtrack.us
+cic.gc.ca
+audioboom.com
+gaadiwaadi.com
+msidentity.com
+azlyrics.com
+cplusplus.com
+google.co.ma
+threadless.com
+express.pk
+cheatsheet.com
+discordapp.net
+indozone.id
+nab.com.au
+samsungcloudsolution.com
+glassdoor.co.in
+db.tt
+popin.cc
+onstunkyr.com
+slideserve.com
+arbeitsagentur.de
+wayne.edu
+itunes.com
+mix.com
+cision.com
+radikal.ru
+xserver.ne.jp
+petapixel.com
+studfile.net
+crowd1.com
+doe.gov
+dw.de
+gsis.gr
+aafp.org
+b2b.cn
+fireeye.com
+like.video
+9news.com.au
+jal.co.jp
+jiemian.com
+bible.com
+stats.gov.cn
+truecaller.com
+cimaclub.in
+ruliweb.com
+kq36.com
+govinfo.gov
+kissasian.sh
+mindtools.com
+vogue.co.uk
+medcom.id
+androidpolice.com
+techdirt.com
+webull.com
+xm.com
+samsungdm.com
+amgdgt.com
+eugdpr.org
+argaam.com
+eccn.com
+ip138.com
+oecd-ilibrary.org
+ipsos.com
+lapatilla.com
+kia.com
+sqlite.org
+themoscowtimes.com
+alltrails.com
+artsy.net
+brealtime.com
+bplaced.net
+gsk.com
+rtactivate.com
+zenfolio.com
+cn163.net
+brainly.co.id
+kahoot.com
+fmprc.gov.cn
+worthpoint.com
+digitaloceanspaces.com
+annals.org
+magento.com
+vitalsource.com
+brassring.com
+dispatch.com
+wikitravel.org
+informa.com
+elperiodico.com
+thermofisher.com
+io9.com
+deseret.com
+italist.com
+adtimaserver.vn
+subito.it
+quillbot.com
+1plus1tv.ru
+pdx.edu
+godwineagles.org
+syracuse.com
+mindbodygreen.com
+gxu.edu.cn
+planalto.gov.br
+otzovik.com
+imore.com
+tinkercad.com
+julian-fashion.com
+nga.cn
+iyfubh.com
+linguee.com
+ntu.edu.sg
+checkpoint.com
+indiaglitz.com
+garant.ru
+mabanque.bnpparibas
+epson.com
+ip-api.com
+stripe.network
+sat.gob.mx
+abchina.com
+aif.ru
+allure.com
+bayer.com
+postimage.org
+privat24.ua
+amnh.org
+chaoxing.com
+fca.org.uk
+sciencenet.cn
+mt.gov
+liu.se
+userreport.com
+tuwien.ac.at
+eldiario.es
+s-msedge.net
+wbs-law.de
+finra.org
+makezine.com
+rwth-aachen.de
+veoh.com
+blogspot.se
+abc7.com
+fudan.edu.cn
+inps.it
+carbonmade.com
+kinsta.com
+hpage.com
+ccavenue.com
+photopea.com
+okaz.com.sa
+boe.es
+e-hentai.org
+conac.cn
+cox.com
+hotjar.io
+cinemablend.com
+newindianexpress.com
+cigna.com
+nu.nl
+htc.com
+keybase.io
+crateandbarrel.com
+ccm.net
+liveonscore.tv
+shaw.ca
+laleggepertutti.it
+izatcloud.net
+venmo.com
+iqoption.com
+auspost.com.au
+webshots.com
+turkishairlines.com
+arabnews.com
+paloaltonetworks.com
+trilltrill.jp
+foreignaffairs.com
+delaware.gov
+ontvtime.ru
+contentmarketinginstitute.com
+piliapp.com
+google.lv
+ali213.net
+mingpao.com
+capterra.com
+nv.gov
+tf1.fr
+wfp.org
+d1ev.com
+blogspot.ru
+ovh.co.uk
+opera-mini.net
+hasbro.com
+tapatalk.com
+gigya.com
+youzan.com
+zoosnet.net
+allstate.com
+chinagate.cn
+wnyc.org
+wowma.jp
+buyma.us
+kroger.com
+impots.gouv.fr
+xboxab.com
+valuecommerce.com
+infourok.ru
+adskeeper.com
+wps.com
+mega.co.nz
+uab.edu
+undertone.com
+smartinsights.com
+pfizer.com
+lexico.com
+exeter.ac.uk
+lyft.com
+ibanking-services.com
+taroot-rangi.com
+google.iq
+telenet.be
+republicworld.com
+pantheonsite.io
+iam.gov.sa
+uzh.ch
+rokna.net
+worldstarhiphop.com
+kongregate.com
+google.com.ng
+searchmulty.com
+pushails.com
+collabserv.com
+a-mo.net
+cuni.cz
+cincinnati.com
+mofa.go.jp
+tau.ac.il
+westerndigital.com
+pinterest.jp
+gazzetta.it
+emailmeform.com
+mob.com
+spectator.co.uk
+poynter.org
+malavida.com
+consumerfinance.gov
+auburn.edu
+europepmc.org
+ada.org
+uib.no
+7po.com
+bonanza.com
+tenor.co
+myflorida.com
+dytt8.net
+popdaily.com.tw
+edupage.org
+nationalgeographic.org
+peacocktv.com
+emb-japan.go.jp
+popbela.com
+purevolume.com
+techweb.com.cn
+mlit.go.jp
+syosetu.com
+ccaonline.cn
+vuejs.org
+yr.no
+nttdocomo.co.jp
+usask.ca
+66cruises.com
+fullhdfilmizlesene.com
+newgrounds.com
+aspca.org
+ai.marketing
+ktla.com
+broadwayworld.com
+kit.edu
+techtimes.com
+sc-prod.net
+beatport.com
+flippingbook.com
+goldmansachs.com
+tim.it
+docomo.ne.jp
+domestika.org
+uptodate.com
+campaignmonitor.com
+nsdl.com
+storyblocks.com
+ezgif.com
+progressive.com
+mercedes-benz.com
+sdamgia.ru
+toasttab.com
+brunarosso.com
+metoffice.gov.uk
+domdex.com
+pandasecurity.com
+timesunion.com
+marksandspencer.com
+ichano.cn
+web-hosting.com
+b-cdn.net
+weiyun.com
+sportskeeda.com
+urbanairship.com
+instyle.com
+tidal.com
+harpercollins.com
+overleaf.com
+corporatefinanceinstitute.com
+hh010.com
+wsimg.com
+deviantart.net
+unibo.it
+plywoodenchant.com
+nflximg.net
+toyokeizai.net
+go2cloud.org
+brightspace.com
+itslearning.com
+gleam.io
+icrc.org
+muffingroup.com
+webopedia.com
+thumbtack.com
+qoo10.jp
+sheffield.ac.uk
+capgemini.com
+casetify.com
+scitation.org
+caixabank.es
+technet.com
+freemake.com
+telugu360.com
+wmo.int
+williams-sonoma.com
+shifen.com
+electrek.co
+syfy.com
+mercadopago.com.ar
+jsfiddle.net
+prevention.com
+auckland.ac.nz
+setare.com
+alaska.edu
+spacex.com
+v-mate.mobi
+geistm.com
+nhm.ac.uk
+repec.org
+utwente.nl
+president.gov.ua
+getcourse.ru
+okstate.edu
+vedomosti.ru
+monbin.site
+kraken.com
+wordhippo.com
+edgecastcdn.net
+saednews.com
+ahrq.gov
+csoonline.com
+zamzar.com
+gamasutra.com
+documentforce.com
+mymodernmet.com
+harrods.com
+obsproject.com
+admitad.com
+magazineluiza.com.br
+eweek.com
+netlify.com
+suicidepreventionlifeline.org
+cbr.com
+gameforge.com
+cncn.org.cn
+rijksoverheid.nl
+hrloo.com
+nutrition.org
+statesman.com
+appboy.com
+amtrak.com
+lacounty.gov
+sage.com
+playstation.net
+ornl.gov
+rki.de
+westpac.com.au
+gao7.com
+yaplakal.com
+gsxt.gov.cn
+blogsky.com
+dxy.cn
+smallseotools.com
+mojifen.com
+alistapart.com
+visymo.com
+bicentenariobu.com
+fragrantica.com
+skrill.com
+ces.tech
+pbc.gov.cn
+magodasimagens.com.br
+tencent-cloud.net
+20min.ch
+masterpapers.com
+csulb.edu
+kent.edu
+storeboard.com
+cognitivlabs.com
+agenziaentrate.gov.it
+krebsonsecurity.com
+microsoft.net
+soton.ac.uk
+tcd.ie
+bundestag.de
+ringcentral.com
+testbook.com
+worldoftanks.ru
+nintendo.net
+bozhong.com
+sphinx-doc.org
+ichano.com
+cian.ru
+hsbc.com
+macmillan.com
+openlibrary.org
+nrich.ai
+googleoptimize.com
+gismeteo.ua
+miniclip.com
+radio-canada.ca
+rikunabi.com
+www.gov.pl
+xuexi.cn
+smartclip.net
+deseretnews.com
+eporner.com
+uni-hamburg.de
+rsf.org
+svt.se
+gsa.gov
+mobirise.info
+ko-fi.com
+cityu.edu.hk
+rs-online.com
+univision.com
+aircanada.com
+licindia.in
+index-education.net
+icloud.com.cn
+friv.com
+99designs.com
+freshbooks.com
+fairmont.com
+cbinsights.com
+delgarm.com
+jeuxvideo.com
+ampproject.net
+bestwestern.com
+newtalk.tw
+art.pl
+eventbrite.ca
+wtop.com
+uproxx.com
+laopm.com
+callofduty.com
+jxmall.com
+healthgrades.com
+pewtrusts.org
+nai.com
+cisa.gov
+pinterest.com.au
+nationaltrust.org.uk
+digitalspy.com
+locationsreverenceaid.com
+mn.gov
+storygize.net
+fishki.net
+microsoftazuread-sso.com
+goibibo.com
+dlsite.com
+newsit.gr
+openlearning.com
+wrike.com
+thalesgroup.com
+multiurok.ru
+consumeraffairs.com
+payforessay.net
+torrentfreak.com
+deadspin.com
+hik-connect.com
+smbc.co.jp
+etonline.com
+prada.com
+gothamist.com
+cba.pl
+doramatv.live
+playground.xyz
+sunsu521.com
+isi.edu
+wvu.edu
+mi-img.com
+anticheatexpert.com
+steinberg.net
+smm.cn
+amazonalexa.com
+airbus.com
+dupont.com
+humanesociety.org
+postimages.org
+verywellfit.com
+duniagames.co.id
+directredirection.com
+academic.ru
+ipify.org
+16personalities.com
+weightwatchers.com
+medicaldaily.com
+wisegeek.com
+revopush.com
+webcindario.com
+douyincdn.com
+dv37.com
+findagrave.com
+commentcamarche.net
+hackerrank.com
+usabilla.com
+pdf2go.com
+healthychildren.org
+le.ac.uk
+bbwhf.com
+doaj.org
+mseav.com
+galacticmenueasier.com
+tomtom.com
+theathletic.com
+johnlewis.com
+transparency.org
+ucsusa.org
+annualreviews.org
+brownpapertickets.com
+zingmp3.vn
+jnj.com
+netteller.com
+which.co.uk
+rai.it
+nyti.ms
+vodafone.de
+fx678.com
+pocket-lint.com
+viadeo.com
+motortrend.com
+knightlab.com
+spreadshirt.com
+delltechnologies.com
+crisp.chat
+dailywire.com
+csic.es
+aralego.net
+wpbeginner.com
+caichongwang.com
+wral.com
+pagina12.com.ar
+raider.io
+iucn.org
+sublimetext.com
+doorblog.jp
+audacityteam.org
+corporate-ir.net
+wowkorea.jp
+mhthemes.com
+google.si
+sierraclub.org
+patheos.com
+corel.com
+alfavita.gr
+xoom.com
+ucar.edu
+carleton.ca
+tbs.co.jp
+petfinder.com
+thediplomat.com
+hse.gov.uk
+imgflip.com
+doc.gov
+opensecrets.org
+diplo.de
+dmxleo.com
+haveibeenpwned.com
+beinsports.com
+gnavi.co.jp
+adtdp.com
+zxxk.com
+popcash.net
+wisconsin.gov
+polldaddy.com
+unext.jp
+talkingpointsmemo.com
+ikco.ir
+ga.gov
+sfsu.edu
+pelisplushd.net
+ithome.com
+g2a.com
+lyst.com
+sagawa-exp.co.jp
+canadiantire.ca
+web.com
+wkzuche.com
+lequipe.fr
+uni-koeln.de
+argos.co.uk
+vidazoo.com
+xuite.net
+hupu.com
+hu-berlin.de
+de17a.com
+phonearena.com
+iarc.fr
+choicehotels.com
+mizuhobank.co.jp
+domain.com
+ghostery.com
+dur.ac.uk
+subway.com
+mosaiquefm.net
+loewe.com
+project-syndicate.org
+japantoday.com
+tubecup.net
+sparkfun.com
+webflow.com
+ssllabs.com
+mint.com
+prlog.org
+unacademy.com
+oppo.com
+fanlibang.com
+ebaumsworld.com
+uottawa.ca
+aruba.it
+las2orillas.co
+clarium.io
+wantedly.com
+dal.ca
+grammy.com
+health.gov.au
+hudong.com
+icons8.com
+7gz.com
+roboform.com
+google.com.qa
+sjsu.edu
+worldatlas.com
+eleconomista.es
+secu100.com
+gemius.pl
+moengage.com
+buffalonews.com
+trezor.io
+beauty321.com
+bigolive.tv
+hackaday.com
+template.net
+px-cloud.net
+tineye.com
+linode.com
+mangakakalot.com
+yfrog.com
+lin.ee
+tabnak.ir
+flagcounter.com
+saic.gov.cn
+fontawesome.io
+ooopic.com
+adlightning.com
+ugent.be
+adme.ru
+pennlive.com
+daserste.de
+wyborcza.pl
+weddingwire.com
+99acres.com
+books.com.tw
+ilmeteo.it
+getgo.com
+concordia.ca
+redgifs.com
+unctad.org
+miwifi.com
+yidianzixun.com
+uplynk.com
+extendthemes.com
+kbs.co.kr
+mozillazine.org
+4399.com
+haber7.com
+hsforms.com
+4pda.ru
+ole.com.ar
+srf.ch
+geforce.com
+acast.com
+provincial.com
+speedyloan.net
+commonapp.org
+techtudo.com.br
+grist.org
+gale.com
+qvc.com
+playbuzz.com
+no-ip.org
+regulations.gov
+wwe.com
+etsystatic.com
+tucows.com
+myapp.com
+pcpartpicker.com
+collegehumor.com
+jpn.org
+maine.edu
+exacttarget.com
+moe.gov.sa
+google.tn
+villagevoice.com
+microad.jp
+northjersey.com
+verajohn.com
+linksys.com
+5i8xkqjmqubv.top
+nabble.com
+vimeocdn.com
+sascdn.com
+batds.net
+post.ir
+publishersweekly.com
+epik.com
+pewsocialtrends.org
+dmkt-sp.jp
+greatist.com
+umt.edu
+mparticle.com
+jobvite.com
+exploratorium.edu
+potterybarn.com
+innity.com
+movember.com
+tv-tokyo.co.jp
+ensighten.com
+trustx.org
+hea.cn
+tommy.com
+karger.com
+christies.com
+tongdun.net
+olx.com.eg
+admicro.vn
+mohurd.gov.cn
+rmit.edu.au
+rooziato.com
+mynewsdesk.com
+simplemachines.org
+bristol.ac.uk
+ardmediathek.de
+klm.com
+mobirise.com
+nbg.gr
+clickagy.com
+theweathernetwork.com
+pravda.ru
+petitiononline.com
+wdc.com
+btloader.com
+nrw.de
+cochrane.org
+pathofexile.com
+yourstory.com
+merchantcircle.com
+belfasttelegraph.co.uk
+jinshuju.net
+uploaded.net
+ladsp.jp
+dev.to
+xueqiu.com
+appdynamics.com
+ushmm.org
+hln.be
+rapidtables.com
+nec.com
+excite.com
+autonavi.com
+guidechem.com
+montrealgazette.com
+rivals.com
+betanews.com
+credit-suisse.com
+infoq.com
+e2ma.net
+secureinternetbank.com
+housebeautiful.com
+te.eg
+pen.io
+mirtesen.ru
+wri.org
+picuki.com
+gdz.ru
+tu-berlin.de
+sonypictures.com
+eroterest.net
+dg-datenschutz.de
+pearsonvue.com
+regnum.ru
+bankbazaar.com
+gridserver.com
+graphicriver.net
+jimdosite.com
+jcrew.com
+tbcache.com
+getsatisfaction.com
+csun.edu
+us-cert.gov
+taz.de
+abc7news.com
+tom.com
+vidal.fr
+sundaysky.com
+elcorteingles.es
+contently.com
+guancha.cn
+cnr.it
+uwa.edu.au
+monash.edu.au
+appannie.com
+gameinformer.com
+elbotola.com
+umbc.edu
+ap.gov.in
+masslive.com
+superbthemes.com
+intercomcdn.com
+uni-bonn.de
+ahnlab.com
+nar.realtor
+i.ua
+trafficfactory.biz
+jagonews24.com
+publpush.com
+rutor.info
+argentina.gob.ar
+uschamber.com
+colourlovers.com
+utm.edu
+dailyherald.com
+cgtn.com
+bumlam.com
+libguides.com
+httpwg.org
+windowsreport.com
+reactjs.org
+cyberleninka.ru
+xitek.com
+timesnownews.com
+nrf.com
+tor.com
+periscope.tv
+urldefense.com
+56.com
+9876ydd.com
+01net.com
+test.com
+opendemocracy.net
+warriorplus.com
+walesonline.co.uk
+paho.org
+yesky.com
+belkin.com
+christianitytoday.com
+ficbook.net
+marxists.org
+zulily.com
+samsunghealth.com
+intipseleb.com
+winamp.com
+random.org
+fandango.com
+bundesregierung.de
+askmen.com
+load24.biz
+ssense.com
+nbclosangeles.com
+onlinehome.us
+tokyo.lg.jp
+splcenter.org
+activision.com
+wsdvs.com
+bdnews24.com
+at.ua
+iht.com
+tue.nl
+upm.es
+techacademy.jp
+blastcahs.com
+ofhappinyer.com
+adobess.com
+businesstoday.com.tw
+xg4ken.com
+iloveimg.com
+macleans.ca
+dynamics.com
+directv.com
+amazon.sa
+liverpoolecho.co.uk
+moe.edu.cn
+blibli.com
+simonandschuster.com
+lexology.com
+imotech.tech
+odu.edu
+basf.com
+gos-gsp.io
+kontur.ru
+msn.cn
+autonews.com
+pagesjaunes.fr
+nationalinterest.org
+pirate-bay.net
+segmentfault.com
+rtbf.be
+archives-ouvertes.fr
+powerlinks.com
+niuche.com
+piriform.com
+vademecum.es
+shef.ac.uk
+biz.ua
+hrblock.com
+8tracks.com
+theringer.com
+mgmresorts.com
+wonderhowto.com
+techinasia.com
+jugem.jp
+ionos.de
+physiology.org
+im-apps.net
+namesilo.com
+worksmobile.com
+programiz.com
+nh.gov
+audubon.org
+xml-sitemaps.com
+irecommend.ru
+cloudflareinsights.com
+komonews.com
+sonos.com
+navyfederal.org
+tulane.edu
+lapresse.ca
+thomasnet.com
+fout.jp
+hover.com
+nessma.tv
+sling.com
+lihkg.com
+hse.ru
+acronis.com
+gizmag.com
+fdic.gov
+yudu.com
+nxp.com
+matchesfashion.com
+avgle.com
+palmbeachpost.com
+aetna.com
+localbitcoins.com
+arcot.com
+iranserver.com
+e-estekhdam.com
+star-telegram.com
+ctfassets.net
+sbicard.com
+uwm.edu
+etnet.com.hk
+dreamhosters.com
+escapefromtarkov.com
+depaul.edu
+annualcreditreport.com
+teslamotors.com
+pro-market.net
+tsyndicate.com
+shinobi.jp
+computerbild.de
+fb.watch
+repl.it
+vrt.be
+permutive.com
+adorama.com
+chevrolet.com
+square-enix.com
+tirto.id
+rundsp.com
+omny.fm
+bounceexchange.com
+magcloud.com
+connexity.net
+msocsp.com
+citibankonline.com
+anz.com
+sz.gov.cn
+runescape.wiki
+microfocus.com
+xtgem.com
+rarathemes.com
+madmimi.com
+l-msedge.net
+brobible.com
+microchip.com
+dict.cc
+lumenlearning.com
+linux.com
+edmodo.com
+guifun.com
+symbolab.com
+worldfcdn.com
+courier-journal.com
+domain.com.au
+mts.ru
+hol.es
+tnt.com
+court.gov.cn
+sportzwiki.com
+carfax.com
+liveabout.com
+iconfinder.com
+awin.com
+nuance.com
+cbn.com
+aol.de
+onliner.by
+ofweek.com
+plannedparenthood.org
+olx.ro
+pch.com
+unbounce.com
+flightaware.com
+bitmoji.com
+ntv.ru
+eastdane.com
+daveramsey.com
+bloombergquint.com
+g2.com
+gitee.com
+qianlong.com
+tinypng.com
+milb.com
+pymnts.com
+piojm.tech
+yahoo.com.tw
+lianjia.com
+thedrive.com
+reverb.com
+emol.com
+surrey.ac.uk
+neoldu.com
+polimi.it
+saude.gov.br
+hongkiat.com
+gigafile.nu
+appsto.re
+adn.com
+newsmth.net
+parade.com
+almaany.com
+samsungqbe.com
+fumail.de
+ri.gov
+ria.com
+devpost.com
+sumo.com
+rian.ru
+google.com.uy
+mikrotik.com
+ackcdn.net
+gcloudcs.com
+vu.nl
+eqads.com
+vlive.tv
+promotional-concepts.de
+visitbeijing.com.cn
+law360.com
+viator.com
+lasvegassun.com
+fujitv.co.jp
+mohrss.gov.cn
+nation2.com
+kapook.com
+madison.com
+ftchinese.com
+wmgtr.com
+labirint.ru
+justwatch.com
+uoguelph.ca
+mtu.edu
+koreaherald.com
+factcheck.org
+aftership.com
+spotxcdn.com
+wallethub.com
+baike.com
+zohopublic.com
+weixinyunduan.com
+e-taxes.gov.az
+rarlab.com
+jamieoliver.com
+wetter.com
+rr.com
+turbobit.net
+google.jo
+flyme.cn
+diabetesjournals.org
+sharepointonline.com
+googlezip.net
+thenounproject.com
+sm.cn
+earthday.org
+lenovo.com.cn
+mysanantonio.com
+emag.ro
+pof.com
+nd.gov
+sfweekly.com
+jdsports.com
+blackmagicdesign.com
+tanx.com
+smartthings.com
+expansion.com
+rbxcdn.com
+huji.ac.il
+sankei.com
+elementor.com
+the-scientist.com
+militarytimes.com
+oas.org
+ccgp.gov.cn
+onamae.com
+player.fm
+sendgrid.com
+meltwater.com
+gu.se
+dothome.co.kr
+counterpunch.org
+emptyhammock.com
+seattle.gov
+liveperson.net
+nine.com.au
+tensorflow.org
+carwale.com
+duba.com
+9to5google.com
+forever21.com
+twoo.com
+7k7k.com
+ttu.edu
+birminghammail.co.uk
+pptv.com
+lung.org
+lge.com
+nation.africa
+db.com
+adtechus.com
+freenet.de
+pressherald.com
+randomhouse.com
+europeana.eu
+glosbe.com
+avvo.com
+lightinthebox.com
+ethereum.org
+sucuri.net
+ushareit.com
+pg.com
+ddns.net
+xmsecu100.net
+triblive.com
+weidian.com
+qmul.ac.uk
+jamendo.com
+townandcountrymag.com
+ename.com
+makeleio.gr
+coolors.co
+brother.com
+wur.nl
+sii.cl
+friendfeed.com
+balenciaga.com
+nazwa.pl
+ahram.org.eg
+fun48.com
+streamyard.com
+parsely.com
+kuaishou.com
+serverfault.com
+js.org
+abplive.com
+hrc.org
+simplecast.com
+filmaffinity.com
+tuoitre.vn
+gotporn.com
+usajobs.gov
+twincities.com
+videoplayerhub.com
+tomsk.ru
+bris.ac.uk
+ctobsnssdk.com
+desdev.cn
+masrawy.com
+cmcm.com
+linux.org
+xinmin.cn
+foodnetwork.co.uk
+umanitoba.ca
+astm.org
+tiu.ru
+crazygames.com
+sfexaminer.com
+azurefd.net
+google.org
+sans.org
+dtscout.com
+diamond.jp
+techopedia.com
+telekom.de
+taringa.net
+presslogic.com
+zety.com
+banamex.com
+bao315.com
+uark.edu
+nbcbayarea.com
+drtuber.com
+ambafrance.org
+localytics.com
+nga.gov
+vermont.gov
+edutopia.org
+mihanblog.com
+lockerdome.com
+transunion.com
+aei.org
+nyse.com
+blackplanet.com
+multiply.com
+hs-scripts.com
+timesofmalta.com
+rockcontent.com
+fanpop.com
+king5.com
+digikey.com
+yougov.com
+greatschools.org
+taylorandfrancis.com
+elyamnelaraby.com
+miaozhen.com
+ad-delivery.net
+uk.net
+dndbeyond.com
+nc.gov
+pstatic.net
+khabaronline.ir
+au.dk
+pronews.gr
+warriorforum.com
+dutchtracking.nl
+customs.gov.cn
+9game.cn
+nmsu.edu
+nintendo.co.jp
+eurobricks.com
+imgbb.com
+bbt.com
+spotifycdn.com
+ck12.org
+cognitoforms.com
+mathsisfun.com
+westelm.com
+llnwd.net
+whois.com
+ulaval.ca
+thejournal.ie
+hebnews.cn
+compuserve.com
+youradchoices.ca
+paylocity.com
+bbci.co.uk
+albany.edu
+bain.com
+gobizkorea.com
+computer.org
+cedexis.com
+feedbooks.com
+hs-banner.com
+fullstory.com
+belgium.be
+logi.com
+hostinger.com
+maribacaberita.com
+medpagetoday.com
+bankofengland.co.uk
+springeropen.com
+deccanherald.com
+rp-online.de
+brainpickings.org
+fetlife.com
+ryerson.ca
+rockpapershotgun.com
+hs-analytics.net
+kalerkantho.com
+mixdrop.co
+rebrand.ly
+jiji.com
+pangle.io
+virgin.com
+tiny.cloud
+iowa.gov
+cebnet.com.cn
+ufrj.br
+dashlane.com
+poets.org
+rbc.com
+podio.com
+sci-hub.se
+uni-freiburg.de
+mattel.com
+razorpay.com
+haraj.com.sa
+wacom.com
+jetblue.com
+underarmour.com
+brandeis.edu
+break.com
+tu-dresden.de
+copyblogger.com
+filmibeat.com
+airasia.com
+aftodioikisi.gr
+sil.org
+bankcomm.com
+hrsa.gov
+luc.edu
+healthaffairs.org
+fontsquirrel.com
+backlinko.com
+articulate.com
+viifax.com
+cleantechnica.com
+neea.edu.cn
+auswaertiges-amt.de
+ntv.com.tr
+csis.org
+bitrix24.ru
+bethesda.net
+olark.com
+secureservercdn.net
+schneider-electric.com
+lululemon.com
+fortinet.com
+bytedance.com
+genial.ly
+thomann.de
+blick.ch
+justpremium.com
+slides.com
+keio.ac.jp
+immobilienscout24.de
+giantbomb.com
+cinarra.com
+conta.cc
+cybersource.com
+23andme.com
+mediamarkt.de
+echoroukonline.com
+allthingsd.com
+unibe.ch
+mundosexanuncio.com
+hostelworld.com
+nice.org.uk
+mbc.net
+honey.io
+haoyer.com
+offerimage.com
+geek.com
+wjx.cn
+online.fr
+telerik.com
+freedesktop.org
+qut.edu.au
+sc.gov
+sejda.com
+networking.apple
+ivoox.com
+calm.com
+tenki.jp
+xuexila.com
+cyberchimps.com
+readwrite.com
+telegraaf.nl
+gouvernement.fr
+techpowerup.com
+online-metrix.net
+coocan.jp
+emc.com
+9news.com
+sussex.ac.uk
+nat.gov.tw
+whu.edu.cn
+citibank.com
+feishu.cn
+dailynews.com
+du.edu
+tyc.edu.tw
+gva.es
+hltv.org
+cardekho.com
+opentracker.xyz
+aepd.es
+byteoversea.net
+thepetitionsite.com
+thedonald.win
+faceit.com
+bseindia.com
+iupui.edu
+anl.gov
+thechive.com
+bt.cn
+levi.com
+pixieset.com
+iom.int
+highsnobiety.com
+adac.de
+food52.com
+thoughtcatalog.com
+hqporner.com
+utdallas.edu
+freesound.org
+editorialmanager.com
+latercera.com
+adswizz.com
+freeprivacypolicy.com
+dislanelibrar.top
+classdojo.com
+ernet.in
+shorturl.at
+lanzous.com
+umengcloud.com
+thefederalist.com
+woothemes.com
+bouncex.net
+publicradio.org
+coupons.com
+securecafe.com
+webatam.com
+depositfiles.com
+arabi21.com
+webrootcloudav.com
+list.ly
+reson8.com
+dilbert.com
+irfanview.com
+jma.go.jp
+yicai.com
+wm.edu
+dreamwidth.org
+quicksprout.com
+direct.gov.uk
+tiaomu.com
+lovetoknow.com
+aacrjournals.org
+yimg.jp
+singular.net
+adx1.com
+shopifysvc.com
+acfun.cn
+oxfam.org
+dagbladet.no
+malwarebytes.org
+health.gov
+tuko.co.ke
+snu.ac.kr
+wroc.pl
+filmkovasi.org
+178.com
+newsbreak.com
+telstra.com.au
+cjr.org
+jwpsrv.com
+ferrari.com
+exe.io
+usyd.edu.au
+bose.com
+kathimerini.gr
+act.org
+kartra.com
+mozit.cloud
+passportindia.gov.in
+bloglines.com
+nagaswap.org
+niche.com
+qidian.com
+bell.ca
+atom.io
+glaz.tv
+charitynavigator.org
+netbk.co.jp
+join.chat
+nsc.org
+caringbridge.org
+21cn.com
+christianpost.com
+sbixby.com
+kapitalbank.az
+celine.com
+redis.io
+audiomack.com
+leagueofgraphs.com
+powerschool.com
+harley-davidson.com
+aawsat.com
+ksosoft.com
+mongabay.com
+jahannews.com
+adl.org
+bloomingdales.com
+google.ba
+tdscpc.gov.in
+feedingamerica.org
+hotmo.org
+stockcharts.com
+thelocal.se
+livedoor.biz
+unece.org
+hellomagazine.com
+sahamyab.com
+netbeans.org
+nordstromrack.com
+ripe.net
+interactivebrokers.com
+720yun.com
+ismedia.jp
+rthk.hk
+alaskaair.com
+mq.edu.au
+buzzsumo.com
+stripes.com
+onlinemektep.org
+treasuredata.com
+guggenheim.org
+clien.net
+camfrog.com
+fubo.tv
+index.hr
+beforeitsnews.com
+pjmedia.com
+dw-world.de
+daimler.com
+zj.gov.cn
+mymovies.it
+pusher.com
+ipage.com
+all-free-download.com
+toshiba.com
+lanl.gov
+nbcwashington.com
+camsloveaholics.com
+alternativeto.net
+mobilesystemservice.com
+stonybrook.edu
+irishexaminer.com
+unlv.edu
+bath.ac.uk
+transbank.cl
+sendo.vn
+le360.ma
+ef.com
+powells.com
+xiazaiba.com
+spotim.market
+parcelsapp.com
+otomoto.pl
+travelocity.com
+de.vu
+centurylink.com
+bayt.com
+gumtree.com
+wework.com
+indymedia.org
+thehackernews.com
+manutd.com
+obozrevatel.com
+rencaijob.com
+theinquirer.net
+expressen.se
+ust.hk
+chocolateplatform.com
+yahoosmallbusiness.com
+alfabank.ru
+bizographics.com
+cmegroup.com
+hulu.jp
+wykop.pl
+downdetector.com
+datatables.net
+care.com
+innity.net
+bjx.com.cn
+dazn.com
+tvnz.co.nz
+gayboystube.com
+nearpod.com
+modesens.com
+cheqzone.com
+web.dev
+newschool.edu
+hi5.com
+roll20.net
+livetv.sx
+ustr.gov
+sinoptik.ua
+nbcchicago.com
+imoim.app
+khaberni.com
+crn.com
+msstate.edu
+freeservers.com
+mediavine.com
+virtual.edu.az
+questionpro.com
+dokuwiki.org
+simplyhired.com
+scotland.gov.uk
+thenews.com.pk
+termsfeed.com
+unr.edu
+beniculturali.it
+inews.co.uk
+prensa-latina.cu
+akurat.co
+swr.de
+uni-saarland.de
+ibps.in
+c2.com
+onlinekhabar.com
+lucidchart.com
+canoe.ca
+yalla-shoot.com
+nginx.net
+transandfiestas.ga
+rogerebert.com
+brightbrides.net
+llink.site
+zb.com
+sonyentertainmentnetwork.com
+sonymobile.com
+concursolutions.com
+edf.fr
+springserve.com
+hurriyetdailynews.com
+qhimg.com
+mxpnl.com
+apxlv.com
+thanhnien.vn
+hankyung.com
+uri.edu
+packtpub.com
+aarth.net
+duomai.com
+ok.gov
+bancosantander.es
+lublin.pl
+mediamatters.org
+basketball-reference.com
+tigris.org
+cutt.us
+inep.gov.br
+legalzoom.com
+ralphlauren.com
+up.pt
+unenvironment.org
+tanea.gr
+site.com
+1dmp.io
+streetinsider.com
+santander.com.ar
+upworthy.com
+courthousenews.com
+sportbox.ru
+surveymonkey.co.uk
+autismspeaks.org
+q4cdn.com
+ssg.com
+neowin.net
+ukri.org
+navercorp.com
+psmag.com
+nhc.gov.cn
+au.com
+ad.nl
+linuxmint.com
+synxis.com
+google.com.sv
+swarthmore.edu
+rp5.ru
+linuxfoundation.org
+stores.jp
+urban.org
+filmmodu.org
+nau.edu
+redditmedia.com
+fpdf.org
+yandex.kz
+37signals.com
+asiaone.com
+us.es
+photo-ac.com
+alpha.gr
+talent.com
+sawbrokers.com
+webstarts.com
+nikkansports.com
+grademiners.com
+whatismyip.com
+webgains.com
+texastribune.org
+aade.gr
+wikimediafoundation.org
+walla.co.il
+epi.org
+poloniex.com
+demandbase.com
+chipotle.com
+montana.edu
+5w52.com
+rubyonrails.org
+accesspressthemes.com
+ottawacitizen.com
+wildapricot.org
+questia.com
+omaha.com
+dnspod.cn
+ewaybillgst.gov.in
+qunar.com
+animevnn.com
+stoloto.ru
+freepdfconvert.com
+transportation.gov
+garanteprivacy.it
+iqilu.com
+orbitz.com
+frontapp.com
+skyscanner.net
+ntnu.no
+barchart.com
+uned.es
+uidaho.edu
+serverbid.com
+ac-illust.com
+jusbrasil.com.br
+vsco.co
+newsru.com
+intechopen.com
+netpnb.com
+all4webs.com
+videohub.tv
+pewforum.org
+google.com.mm
+123milhas.com
+ice.gov
+newsok.com
+onedio.com
+coremail.cn
+southernliving.com
+cryptotabbrowser.com
+dlink.com
+moneysavingexpert.com
+intercom.com
+mindspring.com
+eol.cn
+carriersignal.info
+abc7chicago.com
+wordfence.com
+ctv.ca
+ksu.edu.sa
+cultofmac.com
+iau.ac.ir
+ada.gov
+jqueryui.com
+amung.us
+wonderplugin.com
+duosecurity.com
+blubrry.com
+wmtransfer.com
+fresherslive.com
+slashfilm.com
+uchile.cl
+libgen.rs
+ckeditor.com
+ksord.com
+xvideos-cdn.com
+squarespace-cdn.com
+mdr.de
+ttvnw.net
+aeon.co
+timeslive.co.za
+unfpa.org
+cmail20.com
+venngage.com
+blm.gov
+psychologicalscience.org
+muckrack.com
+adliran.ir
+swarovski.com
+gittigidiyor.com
+sothebys.com
+imagebam.com
+indiapost.gov.in
+laptopmag.com
+hertz.com
+merck.com
+volkskrant.nl
+tok2.com
+raiplay.it
+oaspapps.com
+forbes.ru
+sunat.gob.pe
+abc7ny.com
+scu.edu
+abola.pt
+open.edu
+fextralife.com
+bizcommunity.com
+caesars.com
+bittrex.com
+ugr.es
+journaldunet.com
+eurobank.gr
+mozdev.org
+dropbox-dns.com
+rtl.de
+upf.edu
+cloudways.com
+jdsports.co.uk
+shapeways.com
+cognizant.com
+tweakers.net
+gazzetta.gr
+educause.edu
+clicky.com
+uni-frankfurt.de
+uni-stuttgart.de
+scienceblogs.com
+cpx.to
+togetter.com
+duden.de
+3ds.com
+globalsecurity.org
+wiocha.pl
+ecowatch.com
+pinterest.ru
+tcs.com
+ilfattoquotidiano.it
+umu.se
+google.ee
+cogocast.net
+photo.net
+rogers.com
+youla.ru
+typingclub.com
+xanga.com
+clubic.com
+deutsche-bank.de
+screencast-o-matic.com
+8m.com
+daumcdn.net
+24s.com
+radiotimes.com
+bannersnack.com
+mizbanfa.net
+auth.gr
+uts.edu.au
+hjenglish.com
+alaraby.co.uk
+panorama.com.al
+sportradar.com
+lodz.pl
+trueleadid.com
+lpages.co
+google.com.np
+kingston.com
+sweetwater.com
+dow.com
+fontspace.com
+chicagobusiness.com
+dice.com
+kerala.gov.in
+creative.com
+elfinanciero.com.mx
+pcauto.com.cn
+maybank2u.com.my
+cafef.vn
+themify.me
+sc-gw.com
+jhsph.edu
+wordpressfoundation.org
+tianyancha.com
+oscars.org
+deref-gmx.net
+toronto.ca
+suite101.com
+psychiatryonline.org
+nii.ac.jp
+coca-colacompany.com
+uhc.com
+st.com
+betfair.com
+questrade.com
+jmty.jp
+blogfreely.net
+southampton.ac.uk
+getfvid.com
+bis.org
+oricon.co.jp
+spectrum.com
+createspace.com
+spoti.fi
+thinglink.com
+mtlnovel.com
+hypermart.net
+commerzbank.de
+wetteronline.de
+ascd.org
+rollcall.com
+ae.com
+adb.org
+bottegaveneta.com
+paychex.com
+idealmedia.io
+bd-pratidin.com
+campaignlive.co.uk
+freefiremobile.com
+91mobiles.com
+pgatour.com
+yunaq.com
+filgoal.com
+allhugefeed.com
+curtin.edu.au
+camp-fire.jp
+arkansas.gov
+mid.ru
+sail-horizon.com
+nrel.gov
+telugustop.com
+deutschlandfunk.de
+outlookindia.com
+swiftkey.com
+sitejabber.com
+google.com.cu
+ciscospark.com
+jalan.net
+bravotv.com
+haomaner.com
+bamboohr.com
+servenobid.com
+sportsurge.net
+blogimg.jp
+tv-asahi.co.jp
+webofknowledge.com
+vzwwo.com
+theodysseyonline.com
+vaticannews.va
+lockheedmartin.com
+optimizesrv.com
+huamu.cn
+114la.com
+nju.edu.cn
+quovadisglobal.com
+rafflecopter.com
+anadolu.edu.tr
+hbx.com
+exlibrisgroup.com
+theroot.com
+govtech.com
+netmng.com
+thedenverchannel.com
+fau.edu
+propellerads.com
+embedly.com
+ssisurveys.com
+boots.com
+richaudience.com
+xx3.kz
+singaporeair.com
+infogr.am
+shueisha.co.jp
+pydata.org
+google.com.gt
+tvp.pl
+dubizzle.com
+mastercard.us
+education.com
+homeaffairs.gov.au
+ic3.gov
+iucnredlist.org
+tsite.jp
+clmbtech.com
+yemek.com
+transportr.io
+noodlemagazine.com
+freedownloadmanager.org
+google.co.cr
+elsalvador.com
+thepointsguy.com
+consumerist.com
+chalmers.se
+eetimes.com
+programme-tv.net
+oneplus.com
+zenwriting.net
+aaxads.com
+caisse-epargne.fr
+snapdeal.com
+nic.ir
+baishan-cloud.net
+gamesindustry.biz
+aalto.fi
+therecipecritic.com
+ntv.co.jp
+valuecommerce.ne.jp
+1push.io
+efukt.com
+indiebound.org
+slidesgo.com
+tendawifi.com
+asme.org
+who.is
+rxlist.com
+homedepot.ca
+snap.com
+cq.gov.cn
+ynetnews.com
+51y5.net
+aad.org
+510hr.com
+unistra.fr
+liverpool.com.mx
+lpsnmedia.net
+abcnews.com
+rabbitpre.com
+die.net
+dion.ne.jp
+freedomhouse.org
+draxe.com
+usg.edu
+wz.cz
+1mg.com
+calpoly.edu
+baidustatic.com
+thisismoney.co.uk
+appfolio.com
+vc.ru
+freshchat.com
+sipo.gov.cn
+rzd.ru
+later.com
+en25.com
+barclays.co.uk
+michelin.com
+cirquedusoleil.com
+astrologyanswers.com
+brightside.me
+ntlworld.com
+feebee.com.tw
+hiido.com
+sporcle.com
+k-state.edu
+pokemon.com
+amzn.com
+golem.de
+igamecj.com
+cafelog.com
+tweaktown.com
+microcenter.com
+leroymerlin.ru
+business.gov.au
+thenorthface.com
+gib.gov.tr
+fbdown.net
+pair.com
+edgecastdns.net
+wsbtv.com
+kddi.com
+arm.com
+comixology.com
+xmtrading.com
+zebra.com
+sitesell.com
+gigazine.net
+userbenchmark.com
+bradesco.com.br
+alwakeelnews.com
+uni-muenster.de
+kubernetes.io
+dailypost.ng
+leafly.com
+commerce.gov
+magicbricks.com
+keepa.com
+ee.co.uk
+goodmorningamerica.com
+doc88.com
+novayagazeta.ru
+bloomsbury.com
+donaldjtrump.com
+visme.co
+eaton.com
+uni-tuebingen.de
+reading.ac.uk
+hi-ho.ne.jp
+linternaute.com
+helpscout.net
+aleassbun.site
+ebay.es
+uni-leipzig.de
+pajak.go.id
+chez.com
+milanuncios.com
+torgi.gov.ru
+louisville.edu
+google.com.bo
+connectad.io
+eiu.com
+hket.com
+avma.org
+picmonkey.com
+truthout.org
+heraldscotland.com
+kinogo.biz
+yousendit.com
+freeserve.co.uk
+sympatico.ca
+cnsnews.com
+mozilla-europe.org
+paulgraham.com
+sport-express.ru
+ubnt.com
+fark.com
+gamedog.cn
+postandcourier.com
+eatright.org
+aolcdn.com
+elespectador.com
+birmingham.ac.uk
+fodors.com
+mail-archive.com
+ency-education.com
+abc13.com
+53.com
+zionschool.info
+livehindustan.com
+nchsoftware.com
+r7.com
+upv.es
+activecampaign.com
+printful.com
+instagr.am
+duba.net
+nsa.gov
+japannetbank.co.jp
+cert.org
+prf.hn
+as.me
+lookbook.nu
+yonhapnews.co.kr
+pcgamesn.com
+tmcnet.com
+gouv.qc.ca
+wccftech.com
+livemaster.ru
+idealista.com
+youngjoygame.com
+wpscdn.cn
+plin.im
+winscp.net
+lgtvsdp.com
+m-w.com
+buenotraffic.com
+simplilearn.com
+shinezone.com
+sandiegozoo.org
+marketwire.com
+reurl.cc
+philstar.com
+fmovies.to
+netlify.app
+uea.ac.uk
+darkreading.com
+gaiaonline.com
+uniregistry.com
+mcclatchydc.com
+vindicosuite.com
+flexjobs.com
+zap2it.com
+sekindo.com
+fastpic.ru
+corsair.com
+bloomberglaw.com
+univ-lorraine.fr
+taiwannews.com.tw
+zhiding.cn
+ratemyprofessors.com
+citysearch.com
+yubico.com
+aipai.com
+buyersdrive.com
+appen.com
+sheypoor.com
+kapwing.com
+chronoengine.com
+mca.gov.cn
+artofmanliness.com
+siamswim.com
+interpol.int
+cdnbye.com
+cardiff.ac.uk
+bfi.org.uk
+qwant.com
+trimble.com
+kitapsec.com
+costco.ca
+uni-mainz.de
+cdngslb.com
+grainger.com
+convertkit.com
+finn.no
+a9vg.com
+finanzen.net
+thisamericanlife.org
+bab.la
+ulule.com
+shazam.com
+adaa.org
+he.net
+netafraz.com
+eloqua.com
+policybazaar.com
+gust.com
+realme.com
+saisoncard.co.jp
+monotaro.com
+freerepublic.com
+lenovomm.com
+onlineradiobox.com
+meteofrance.com
+paper.li
+webmoney.ru
+cbo.gov
+nrc.nl
+unicredit.it
+lsosad.com
+uv.es
+arthritis.org
+vklass.se
+alarab.com
+hiend.xyz
+avid.com
+dailypakistan.com.pk
+sinica.edu.tw
+llnl.gov
+actblue.com
+paisabazaar.com
+spin.com
+ipetitions.com
+mktoresp.com
+ku6.com
+healio.com
+armorgames.com
+lismcanalys.fun
+zacks.com
+canalplus.com
+justjared.com
+olx.co.id
+vz.ru
+bidtheatre.com
+amebaownd.com
+filedropper.com
+associates-amazon.com
+classlink.com
+tribuneindia.com
+harborfreight.com
+helpstart.co.kr
+gfan.com
+slackb.com
+groupme.com
+yotpo.com
+dtic.mil
+nieuwsblad.be
+natlawreview.com
+jobinja.ir
+huffingtonpost.fr
+scirp.org
+inss.gov.br
+googlegroups.com
+pngwing.com
+trendhunter.com
+adda247.com
+mafengwo.cn
+bancobrasil.com.br
+google.com.py
+currys.co.uk
+ruc.edu.cn
+bea.gov
+smadex.com
+securelist.com
+koreastardaily.com
+uw.edu.pl
+skyscrapercity.com
+jssor.com
+soumu.go.jp
+sme.sk
+affiliatelabz.com
+nst.com.my
+picturepush.com
+chocolatey.org
+bangordailynews.com
+interarbiter.info
+okko.tv
+quikr.com
+quickconnect.to
+willhaben.at
+su.se
+deutschepost.de
+joemonster.org
+xiti.com
+eatingwell.com
+mdhv.io
+clarivate.com
+deakin.edu.au
+loopnet.com
+gsjyzg.com
+ni.com
+napster.com
+medtronic.com
+ruhr-uni-bochum.de
+data.gov.uk
+lehigh.edu
+ganji.com
+streamtape.com
+forward.com
+collegedunia.com
+66law.cn
+dolcegabbana.com
+datsgirl.com
+befunky.com
+gostaresh.news
+wsws.org
+cadenaser.com
+liquipedia.net
+megogo.net
+rootsweb.com
+mail-order-bride.net
+apple-dns.cn
+joomshaper.com
+sail-personalize.com
+cainiao.com
+kuwo.cn
+shinystat.com
+bookmyshow.com
+hunan.gov.cn
+internet-start.net
+load20.biz
+giga.de
+aaas.org
+cabanova.com
+perfectmarket.com
+mises.org
+ath.cx
+mext.go.jp
+sumitomokenki.com.cn
+dnacdn.net
+numbeo.com
+elibrary.ru
+spokesman.com
+rnz.co.nz
+mailtrack.io
+luckyforbet.com
+javmodels.info
+readwriteweb.com
+unaids.org
+mediabistro.com
+skynewsarabia.com
+4.cn
+thepiratebay10.org
+cedexis.net
+nsfc.gov.cn
+datingranking.net
+rev.com
+provsd.info
+uzorak.info
+99yechang.com
+sitemeter.com
+idianfa.com
+hamburg.de
+zmags.com
+datenschutz-generator.de
+moderus.info
+yummly.com
+fonts.net
+cityam.com
+mako.co.il
+cathaypacific.com
+resetera.com
+maisonmargiela.com
+knoxnews.com
+abv.bg
+qantas.com
+retargetly.com
+snip.ly
+betgorebysson.club
+lesanimaux.site
+fb.ru
+eyny.com
+ic.ac.uk
+gentoo.org
+sentinelone.net
+apsense.com
+theoatmeal.com
+niu.edu
+redirectingat.com
+faucetpay.io
+sverigesradio.se
+penguin.co.uk
+v0cdn.net
+quzke.com
+pingan.com
+dtu.dk
+fujifilm.com
+yandex.by
+isabellacharms.xyz
+meethue.com
+larousse.fr
+cleanpng.com
+calgaryherald.com
+fotostrana.ru
+oboporn.com
+scamadviser.com
+mind.org.uk
+desktopnexus.com
+exirbroker.com
+findlaw.cn
+byted.org
+aminoapps.com
+optimum.net
+oboteen.com
+theiet.org
+price.com.hk
+iq-servers.com
+osd.mil
+ihsmarkit.com
+sbi.co.in
+ldoceonline.com
+google.cm
+universetoday.com
+amarujala.com
+tsn.ca
+vietcombank.com.vn
+game8.jp
+phncdn.com
+mps.gov.cn
+gleb.website
+hefteaz.info
+cxense.com
+saramin.co.kr
+mpaa.org
+reg.ru
+w3layouts.com
+united-domains.de
+my.gov.au
+ecfr.gov
+newswise.com
+lexcat.info
+hkprice.info
+hw.ac.uk
+ubuntuforums.org
+jotformeu.com
+ebsco.com
+matamata.com
+avacharms.xyz
+laprensagrafica.com
+joomag.com
+azertag.az
+ccn.com
+588ku.com
+gq-magazine.co.uk
+audiojungle.net
+cmbc.com.cn
+cairn.info
+adtng.com
+brainpop.com
+iinet.net.au
+analog.com
+stackpathcdn.com
+beckershospitalreview.com
+easyasvpn.com
+linguee.fr
+financialcontent.com
+finalfantasyxiv.com
+wakelet.com
+samanage.com
+babbel.com
+cafebazaar.ir
+gammaplatform.com
+mixi.jp
+lowyat.net
+diynetwork.com
+autotrader.co.uk
+liberty.edu
+petco.com
+topshop.com
+feedspot.com
+infusionsoft.app
+elbaestes.pro
+edocr.com
+ctan.org
+deref-web.de
+iadb.org
+u.to
+osce.org
+hubblesite.org
+s-msn.com
+oilprice.com
+uni-goettingen.de
+tapjoyads.com
+ceneo.pl
+viddler.com
+csfd.cz
+nbcphiladelphia.com
+koolearn.com
+liepin.com
+dersyndikalist.info
+anz.com.au
+logmeininc.com
+verywellfamily.com
+ysl.com
+uaf.edu
+50megs.com
+itsmyurls.com
+blogspot.be
+mg.co.za
+movavi.com
+galinka.info
+google.com.bh
+neurology.org
+eso.org
+writeablog.net
+bangbros.com
+inbox.lv
+heraldtribune.com
+arcor.de
+dailythanthi.com
+thairath.co.th
+donanimhaber.com
+gtbank.com
+mapbar.com
+wmich.edu
+newspapers.com
+builtwith.com
+enterprise.com
+cam4.com
+tenpay.com
+beginnersmind.info
+upc.edu
+fsc.org
+binghamton.edu
+opera.software
+hoyendieta.info
+cuisineandhealth.site
+cafemom.com
+energysector.website
+basalam.com
+hollywoodlife.com
+mcall.com
+snazzymaps.com
+jiangsu.gov.cn
+umkc.edu
+chinanews.com.cn
+english-heritage.org.uk
+unipd.it
+carmax.com
+navdmp.com
+toast.com
+unimi.it
+bsigroup.com
+uta.edu
+nbcdfw.com
+google.cat
+tcpdf.org
+interestingengineering.com
+freepeople.com
+tvn24.pl
+opensource.com
+hpplay.cn
+cyberlink.com
+sennheiser.com
+habitat.org
+calculatorsoup.com
+mydealz.de
+duplichecker.com
+marianacastromoreira.com
+homeip.net
+custom-roms.com
+alabama.gov
+imagevenue.com
+google.lu
+europe1.fr
+mrpdata.net
+nikkei.co.jp
+youcanbook.me
+lib.ru
+slidesharecdn.com
+amc.com
+kingtime.jp
+dwcdn.net
+ruby-lang.org
+mk.co.kr
+globes.co.il
+medallia.com
+mythemeshop.com
+soy502.com
+biccamera.com
+spaggiari.eu
+google.com.lb
+tucson.com
+bochk.com
+yangkeduo.com
+aicpa.org
+intoday.in
+thedailyfunkclub.com
+unicamp.br
+dn.se
+xvideos5.com
+taxfoundation.org
+yaml.org
+xcar.com.cn
+aiv-delivery.net
+qhnmdb.com
+clipwatching.com
+thinkphp.cn
+gatesnotes.com
+gssprt.jp
+mql5.com
+techadvisor.co.uk
+cachefly.net
+abdn.ac.uk
+afisha.ru
+centos-webpanel.com
+takepart.com
+larepublica.pe
+buy123.com.tw
+123formbuilder.com
+bewusstsein-events.info
+aeaweb.org
+mxtoolbox.com
+powtoon.com
+futura-sciences.com
+defensenews.com
+formsite.com
+richmond.com
+53kf.com
+copyscape.com
+ut.ac.ir
+samsungelectronics.com
+salary.com
+hackerone.com
+cityheaven.net
+healthdata.org
+government.ru
+conviva.com
+swiggy.com
+almanac.com
+hyundai.com
+kickasstorrents.to
+khn.org
+banco.bradesco
+indigo.ca
+gongchang.com
+liuxue86.com
+impactradius-event.com
+hani.co.kr
+madrid.org
+imgsite.net
+cntd.ru
+openclassrooms.com
+elmogaz.com
+cmail19.com
+newyorkfed.org
+travelchannel.com
+bestbrides.org
+pin.it
+steamcontent.com
+wakwak.com
+brandwatch.com
+adpopblocker.com
+darpa.mil
+lilly.com
+mediav.com
+vim.org
+fotki.com
+postmates.com
+xdrig.com
+vwo.com
+actionnetwork.org
+xozilla.com
+dede58.com
+btcbay.net
+webroot.com
+bihar.gov.in
+elecfans.com
+dazeddigital.com
+aftenposten.no
+sandai.net
+google.com.kh
+aucfan.com
+11alive.com
+txdot.gov
+huxiu.com
+zscaler.com
+bungie.net
+bkrtx.com
+geoedge.be
+perezhilton.com
+dailysabah.com
+royanews.tv
+acx.com
+google.com.et
+louisiana.gov
+ixl.com
+ceskatelevize.cz
+gamerant.com
+cqhot.cn
+interpark.com
+flock.com
+mouser.com
+e-msedge.net
+playboy.com
+oberlo.com
+pluto.tv
+uni-hannover.de
+gda.pl
+yemeksepeti.com
+catb.org
+arga-mag.com
+mprnews.org
+idqqimg.com
+1und1.de
+ccdi.gov.cn
+tripadvisor.ca
+pro-football-reference.com
+ncaa.org
+bham.ac.uk
+gocomics.com
+tube8.com
+zhongguowangshi.com
+nwf.org
+cookieconsent.com
+tealiumiq.com
+rstyle.me
+martinfowler.com
+familyhandyman.com
+macworld.co.uk
+grifo210.com
+ufsc.br
+tovima.gr
+chartboost.com
+pianmenw.com
+gamewith.jp
+tldp.org
+easports.com
+postheaven.net
+orange.com
+neu.edu
+bpi.ir
+myrecipes.com
+librarything.com
+wordwall.net
+ableton.com
+smi2.ru
+amp.dev
+ninemsn.com.au
+jamfcloud.com
+good.is
+qy.net
+redstate.com
+yifysubtitles.org
+samsungpositioning.com
+clustrmaps.com
+themehorse.com
+csc.gov.in
+drift.com
+polyvore.com
+unu.edu
+rbth.com
+sodapdf.com
+thecrimson.com
+prospect.org
+thebestgame2020.com
+delfi.lt
+uniroma1.it
+zanox.com
+bootcss.com
+brit.co
+rose-brides.com
+moppy.jp
+fastweb.com
+vox-cdn.com
+paycomonline.net
+empowher.com
+ru.nl
+usenix.org
+ixbt.com
+choosemyplate.gov
+freepatentsonline.com
+chuansong.me
+ucas.com
+wallpapercave.com
+fgov.be
+youracclaim.com
+thedailystar.net
+rtmark.net
+microsofttranslator.com
+bajajfinserv.in
+businessdictionary.com
+afi-b.com
+people-press.org
+byteimg.com
+swagtraffcom.com
+collective-buyer.com
+chinaacc.com
+freshmeat.net
+winehq.org
+mycima.video
+tes.com
+geoadnxs.com
+kent.ac.uk
+zenodo.org
+acog.org
+almasryalyoum.com
+vitkac.com
+ams.org
+f6s.com
+distcache.org
+az.gov
+yximgs.com
+delfi.lv
+steam-chat.com
+foresee.com
+wnycstudios.org
+metal-archives.com
+unbouncepages.com
+ionicframework.com
+anvato.net
+etymonline.com
+uow.edu.au
+shangri-la.com
+nirsoft.net
+nextdirect.com
+ck-ie.com
+physorg.com
+coca-cola.com
+tinymce.com
+ouo.press
+tripadvisor.fr
+peopledaily.com.cn
+hetzner.com
+csrc.gov.cn
+mec.gov.br
+datingmentor.org
+leadpages.net
+1001fonts.com
+keywordtool.io
+myswitzerland.com
+clickz.com
+jugantor.com
+topgear.com
+avocet.io
+material.io
+universityofcalifornia.edu
+asics.com
+mun.ca
+baseball-reference.com
+rotoworld.com
+grabtaxi.com
+bnnbloomberg.ca
+tribpub.com
+forter.com
+gifshow.com
+spotx.tv
+opensocietyfoundations.org
+bancsabadell.com
+hola.com
+life.ru
+backlog.jp
+tiffany.com
+yuanzhanapp.com
+uncc.edu
+panasonic.jp
+e-derslik.edu.az
+brides.com
+cootlogix.com
+keepass.info
+thenewslens.com
+zaobao.com
+marketsandmarkets.com
+bcove.me
+sky.it
+ladepeche.fr
+perfil.com
+afterpay.com
+rungrinh.vn
+icij.org
+jreast.co.jp
+vingle.net
+ucd.ie
+teamusa.org
+sncf.com
+connatix.com
+whowhatwear.com
+voc.com.cn
+businessoffashion.com
+blackrock.com
+google.co.ke
+calibre-ebook.com
+fisglobal.com
+2o7.net
+vokrug.tv
+okp.com
+flavors.me
+plista.com
+banglanews24.com
+tradeindia.com
+heapanalytics.com
+freemusicarchive.org
+jang.com.pk
+pngegg.com
+colossusssp.com
+adhaven.com
+ipsnews.net
+qnap.com
+moddb.com
+insurancejournal.com
+wfu.edu
+247wallst.com
+arabseed.cam
+comingsoon.net
+g2afse.com
+jkanime.net
+runescape.com
+earthsky.org
+wp-royal.com
+sherwin-williams.com
+wordcounter.net
+hainan.gov.cn
+todoist.com
+paxful.com
+score.org
+selfgrowth.com
+marketingprofs.com
+umb.edu
+ui.ac.id
+unglobalcompact.org
+newadvent.org
+iit.edu
+ninthdecimal.com
+gitbook.io
+wjla.com
+ohio.edu
+khou.com
+crowdrise.com
+anthem.com
+cheshi.com
+alipaydns.com
+saglik.gov.tr
+theadvocate.com
+middlebury.edu
+zoznam.sk
+rawgit.com
+vip.com
+niemanlab.org
+notebookcheck.net
+ravelry.com
+rcn.com
+copart.com
+google.com.ni
+no-ip.com
+sante.fr
+activestate.com
+ozerov.de
+processon.com
+u17.com
+bluejeans.com
+thenationalnews.com
+enorth.com.cn
+ncaa.com
+citigroup.com
+win-rar.com
+toptenreviews.com
+savethechildren.org
+teamspeak.com
+sammobile.com
+muni.cz
+marketwired.com
+hotukdeals.com
+womansday.com
+adultswim.com
+buff.ly
+denofgeek.com
+wimp.com
+confex.com
+und.edu
+pcpop.com
+thenewstribune.com
+wiwo.de
+home.cern
+nongnu.org
+youneedabudget.com
+dayoo.com
+myheritage.com
+hsn.com
+kxcdn.com
+mhtwyat.com
+ufrgs.br
+duapps.com
+bastillepost.com
+toysrus.com
+goop.com
+headlines.pw
+google.mu
+17173.com
+wenthemes.com
+airtm.com
+opm.gov
+sentry-cdn.com
+moneycrashers.com
+gapyear.com
+llbean.com
+google.com.om
+property24.com
+imagetwist.com
+tv.com
+kingcounty.gov
+bunshun.jp
+pravo.gov.ru
+cnhubei.com
+sony.jp
+carsales.com.au
+colourpop.com
+uwyo.edu
+msdmanuals.com
+kentucky.com
+michelejullian.info
+digitalguardian.com
+efe.com
+pozdravok.ru
+ubergizmo.com
+sasac.gov.cn
+theporndude.com
+majhinaukri.in
+moodys.com
+fitgirl-repacks.site
+cnfol.com
+cloudflare-dns.com
+wpi.edu
+royalsociety.org
+computerhistory.org
+hdslb.com
+buymeacoffee.com
+sachsen.de
+samsungiotcloud.com
+coschedule.com
+pinterest.it
+narrative.io
+skladchik.com
+www.ne.jp
+paperwritings.com
+lifehacker.ru
+kantei.go.jp
+bitfinex.com
+shopee.com
+realmadrid.com
+marketscreener.com
+myschoolapp.com
+racked.com
+dunyanews.tv
+angelbroking.com
+jcb.co.jp
+sho.com
+hktdc.com
+dochub.com
+threatpost.com
+rtl.fr
+6abc.com
+meredith.com
+divx.com
+sbis.ru
+openbsd.org
+london.gov.uk
+worldofwarships.eu
+stjude.org
+ge.tt
+breakflip.com
+thestate.com
+palgrave.com
+opensubtitles.org
+bulbagarden.net
+artic.edu
+vector.co.jp
+coach.com
+gadgetsnow.com
+aao.org
+guim.co.uk
+tn.edu.tw
+theinformation.com
+caixin.com
+residentadvisor.net
+droom.in
+womenshealth.gov
+bravenet.com
+spbu.ru
+duga.jp
+50webs.com
+carousell.sg
+bitflyer.com
+geogebra.org
+griffith.edu.au
+destatis.de
+pravda.com.ua
+regjeringen.no
+internet.com
+createsend1.com
+uct.ac.za
+ulifestyle.com.hk
+312168.com
+continental.com
+gohugo.io
+statuspage.io
+ifvod.tv
+16mb.com
+biteable.com
+buzznet.com
+dwz.cn
+harpers.org
+zalando.de
+npc.gov.cn
+airbnb.ca
+lesoir.be
+sohatv.vn
+manualslib.com
+rada.gov.ua
+umaryland.edu
+uscg.mil
+6789.com
+reallifecam.com
+news247.gr
+qafqazinfo.az
+himado.in
+lboro.ac.uk
+businesstoday.in
+citibank.co.in
+radissonblu.com
+creditmutuel.fr
+cookiebot.com
+vfsglobal.com
+amemv.com
+onepeloton.com
+nationwide.com
+thisoldhouse.com
+pornpics.com
+khabarpu.com
+mobypicture.com
+hathitrust.org
+traveloka.com
+nestle.com
+crypto.com
+google.ge
+vevo.com
+refersion.com
+wgntv.com
+btcliving.com
+seznamzpravy.cz
+icptrack.com
+w3techs.com
+jneurosci.org
+tulsaworld.com
+vpnmentor.com
+datacamp.com
+rijksmuseum.nl
+logitechg.com
+digitalcommerce360.com
+cambridgeenglish.org
+codedexchange.com
+americanthinker.com
+ofcom.org.uk
+buddypress.org
+rotary.org
+nova.edu
+espncdn.com
+pe.com
+safer-networking.org
+mbalib.com
+aaos.org
+biblestudytools.com
+realtime-bid.com
+fontanka.ru
+powerofpositivity.com
+lolipop.jp
+ethnos.gr
+justanswer.com
+x2convert.com
+kunlunsl.com
+couponfollow.com
+jagodangdut.com
+googlehosted.com
+jobkorea.co.kr
+brainly.com.br
+jianzhi8.com
+mailjet.com
+myperfect2give.com
+abs.gov.au
+asahi-net.or.jp
+princetonreview.com
+afthemes.com
+everestads.net
+okdiario.com
+ipko.pl
+pons.com
+webgarden.cz
+110mb.com
+domaining.com
+dahe.cn
+offcn.com
+secnews.gr
+sciam.com
+chinamobile.com
+tizianafausti.com
+boltdns.net
+beliefnet.com
+nbcsandiego.com
+ucoz.net
+flannels.com
+dmcdn.net
+listverse.com
+jdsupra.com
+airtelxstream.in
+dccomics.com
+livechat.com
+env.go.jp
+cnn.io
+consequenceofsound.net
+simplyrecipes.com
+cjn.cn
+hessen.de
+jutarnji.hr
+amcdn.vn
+gitbook.com
+carleton.edu
+latribune.fr
+lexilogos.com
+telegram.com
+riafan.ru
+bunnings.com.au
+f95zone.to
+uptimerobot.com
+fco.gov.uk
+semana.com
+xzjdjx.com
+uqam.ca
+pc6.com
+online-audio-converter.com
+psychiatry.org
+udmserve.net
+diandongwajueji.com
+ulb.ac.be
+a.co
+dtscdn.com
+linternaute.fr
+openid.net
+wpu.sh
+yt1s.com
+bgsu.edu
+google.com.pr
+publix.com
+wikidata.org
+distrokid.com
+rpp.pe
+bobvila.com
+thesmokinggun.com
+ansi.org
+delfi.ee
+postaffiliatepro.com
+apc.com
+conferdeploy.net
+vh1.com
+exxonmobil.com
+biggerpockets.com
+yogajournal.com
+regions.com
+atmarkit.co.jp
+click2houston.com
+unwto.org
+pornone.com
+blacklivesmatter.com
+filmweb.pl
+abercrombie.com
+pcisecuritystandards.org
+muji.com
+franceinter.fr
+hdsex.org
+ticksy.com
+hasil.gov.my
+lfstmedia.com
+weblate.org
+deccanchronicle.com
+majestic.com
+thinkupthemes.com
+pglstatp-toutiao.com
+clickondetroit.com
+120ask.com
+amara.org
+presseportal.de
+gizmodo.com.au
+ellitoral.com
+capital.gr
+unipi.it
+yes24.com
+zergnet.com
+nex8.net
+ua.es
+contineljs.com
+yourtango.com
+2mnd56.com
+privacypolicygenerator.info
+jmu.edu
+marktplaats.nl
+bytedance.net
+musescore.com
+zoosk.com
+haqqin.az
+clan.su
+fcbarcelona.com
+mtime.com
+businesscatalyst.com
+ipt.pw
+soccerway.com
+ondemand.com
+up.nic.in
+verizon.net
+dynatrace.com
+egov.kz
+fanduel.com
+santander.com.br
+storenvy.com
+rbi.org.in
+terkini.id
+wuxiaworld.com
+discord.media
+wfaa.com
+yam.com
+pp.ru
+gov.kr
+yourdailysportfix.com
+bama.ir
+google.hn
+diepresse.com
+fullerton.edu
+brightlocal.com
+omnicalculator.com
+easygetinsta.com
+alanba.com.kw
+googleapis.cn
+jaxa.jp
+wefinex.net
+abc15.com
+taipeitimes.com
+lycos.fr
+cineca.it
+miaminewtimes.com
+ipsosinteractive.com
+id.me
+google.mn
+edelman.com
+uni-bremen.de
+osaka-u.ac.jp
+investorplace.com
+aihelp.net
+a3cloud.net
+gmpg.org
+google.cd
+whereby.com
+loreal.com
+okala.com
+21food.cn
+kdvr.com
+samsungrs.com
+wwnorton.com
+instamojo.com
+dropboxstatic.com
+shiftdelete.net
+walkerland.com.tw
+ns1p.net
+flvto.biz
+bancogalicia.com.ar
+saat24.news
+dlr.de
+next.co.uk
+basspro.com
+cuntempire.com
+zcubes.com
+azureedge.us
+sportradarserving.com
+betway.com
+ca.com
+google.ci
+qhdsny.com
+ms.gov
+ilna.news
+internationalwomensday.com
+joelonsoftware.com
+jacksonville.com
+iab.com
+adultfriendfinder.com
+newswire.com
+ssjzw.com
+metlife.com
+todo1.com
+wondershare.net
+b92.net
+c-msedge.net
+transifex.com
+escort-advisor.com
+openvpn.net
+dns-shop.ru
+solarwinds.com
+twitcasting.tv
+escholarship.org
+winnipegfreepress.com
+cnnturk.com
+simon.com
+watanserb.com
+servimg.com
+brew.sh
+booklikes.com
+ca168.com
+themefreesia.com
+smarttradecoin.com
+x-mol.com
+utarget.ru
+iq.com
+brisbanetimes.com.au
+shaadi.com
+kdocs.cn
+unilever.com
+nerdist.com
+10086.cn
+firefox.com.cn
+wowslider.com
+kampyle.com
+it1352.com
+jacobinmag.com
+state.nj.us
+memrise.com
+iod2.cn
+umaine.edu
+qhyhgf.com
+lwn.net
+699pic.com
+globalpost.com
+getepic.com
+mp3-youtube.download
+samba.org
+bjnews.com.cn
+imo.org
+dbs.com
+mxhichina.com
+mediapart.fr
+socialbakers.com
+mashreghnews.ir
+sulekha.com
+nebraska.gov
+eldorado.ru
+getfirefox.com
+treas.gov
+google.li
+videohive.net
+nexage.com
+inosmi.ru
+bget.ru
+sainsburys.co.uk
+upornia.com
+scoop.co.nz
+al3omk.com
+zencdn.net
+wetpaint.com
+viifan.com
+spectrumlocalnews.com
+subhd.com
+solidworks.com
+csdiran.ir
+arukikata.co.jp
+1drv.com
+adthrive.com
+vonage.net
+igg.me
+joebiden.com
+zynga.com
+rospotrebnadzor.ru
+weeklystandard.com
+fzg360.com
+simplisafe.com
+babble.com
+share-videos.se
+rainn.org
+veeam.com
+gurufocus.com
+motherless.com
+blockchain.info
+miamioh.edu
+itar-tass.com
+pec.it
+ca800.com
+screener.in
+sd.gov
+strongest.cn
+nflbite.com
+playvalorant.com
+containerstore.com
+hearthis.at
+excelsior.com.mx
+tagesanzeiger.ch
+leidenuniv.nl
+kicker.de
+powerthesaurus.org
+cudasvc.com
+nimo.tv
+valentino.com
+woshipm.com
+google.am
+cyberciti.biz
+mrtnsvr.com
+thinkquest.org
+oprahmag.com
+webaim.org
+ucdenver.edu
+liverpool.ac.uk
+macrotrends.net
+vudu.com
+pcloud.com
+10fastfingers.com
+animenewsnetwork.com
+athome.co.jp
+iis.net
+nascar.com
+carrd.co
+user-shield.com
+nea.org
+royalgazette.com
+bravehost.com
+coding.net
+redcrossblood.org
+amnestyusa.org
+christianlouboutin.com
+washingtonian.com
+hs.fi
+geo.tv
+center4family.com
+batmanapollo.ru
+statisticbrain.com
+cervantes.es
+amcs-tachyon.com
+tubesafari.com
+symbaloo.com
+clutch.co
+mta.info
+jianzhiba.net
+jocial.com
+prv.pl
+3dn.ru
+playfabapi.com
+rolex.com
+amazon.sg
+icao.int
+allposters.com
+siteimproveanalytics.io
+bendibao.com
+educity.cn
+diva-portal.org
+azurefd.us
+dwell.com
+whattoexpect.com
+dlive.tv
+opb.org
+siteswithcontent.com
+analdin.com
+roche.com
+famitsu.com
+housing.com
+soha.vn
+maxmind.com
+google.com.af
+tongji.edu.cn
+ipinfo.io
+lds.org
+google.is
+atw.hu
+inoreader.com
+budsgunshop.com
+sketch.com
+goodreturns.in
+porntrex.com
+bmwi.de
+cafis-paynet.jp
+chowhound.com
+banesco.com
+akahost.net
+agpd.es
+sensortower.com
+grc.com
+loex.io
+jdpower.com
+empireonline.com
+sdu.edu.cn
+uni-trier.de
+grandviewresearch.com
+wealthsimple.com
+thisiscolossal.com
+netacad.com
+thelocal.de
+4anime.to
+jyb.cn
+prz.edu.pl
+kbstar.com
+mdanderson.org
+yaklass.ru
+cnpq.br
+rus.ec
+seoccc.com
+pornhat.com
+companieshouse.gov.uk
+bumble.com
+fonts.com
+almaghreb24.com
+format.com
+austinchronicle.com
+hikvision.com
+ruvr.ru
+douyinvod.com
+euobserver.com
+img-taboola.com
+kmail-lists.com
+thepresident.gr
+telesurtv.net
+blossomthemes.com
+lrb.co.uk
+pearsoncmg.com
+strath.ac.uk
+nwps.ws
+postman.com
+fourmilab.ch
+commsec.com.au
+jamaran.news
+advocate.com
+href.li
+softwareadvice.com
+petsmart.com
+radiko.jp
+kickassanime.rs
+monex.co.jp
+societegenerale.fr
+index.hu
+hzmklvdieo.com
+laobiao.com
+opengroup.org
+cbsistatic.com
+zum.com
+blogactiv.eu
+blondieshop.com
+nationalgallery.org.uk
+twitlonger.com
+coinpot.co
+clipconverter.cc
+amazon.com.tr
+clicrbs.com.br
+yandex.ua
+guru.com
+worldsecuresystems.com
+flywheelsites.com
+nextcloud.com
+skycn.com
+juniper.net
+lindaikejisblog.com
+a1sewcraft.com
+portableapps.com
+walmart.com.mx
+hypermusk.com
+uisdc.com
+typosthes.gr
+tubitv.com
+fxstreet.com
+ebc.com.br
+skift.com
+daad.de
+niniban.com
+pipedrive.com
+getpaint.net
+slack-redir.net
+jiji.ng
+falabella.com
+baixing.com
+webteb.com
+exness.com
+todayonline.com
+fabric.com
+groupspaces.com
+privacypolicyonline.com
+franchemduty.work
+gitv.tv
+wallstreetforum.net
+barackobama.com
+stihi.ru
+optinmonster.com
+funimation.com
+thimpress.com
+google.com.mt
+uni-due.de
+saatchiart.com
+geocities.co.jp
+trendarbitrage.com
+deployads.com
+flipgrid.com
+restream.io
+gta5-mods.com
+suse.com
+superpages.com
+guardian.ng
+metafilter.com
+local.com
+sandisk.com
+google.ht
+authy.com
+thenationonlineng.net
+uab.cat
+linestep.net
+blueyonder.co.uk
+yourbrideglobal.com
+usfca.edu
+vidyard.com
+cdn77.org
+qub.ac.uk
+morgenpost.de
+bih.nic.in
+split.io
+indiatvnews.com
+blogspot.mx
+city.ac.uk
+theqoo.net
+intercontinental.com
+more.tv
+thewire.in
+b-msedge.net
+angular.io
+3dcartstores.com
+pornsos.com
+picsart.com
+aph.gov.au
+sbipg.sbi
+nearme.com.cn
+sae.org
+lieferando.de
+shopclues.com
+algoritmika.az
+democratandchronicle.com
+samedayessay.com
+bizport.cn
+yinxiang.com
+bancochile.cl
+yankodesign.com
+hapitas.jp
+epo.org
+nitroflare.com
+freebeacon.com
+webpagetest.org
+livecareer.com
+yatra.com
+youme.im
+tvbs.com.tw
+pizzahut.com
+blogspot.ch
+iberia.com
+iconosquare.com
+ttlink.com
+governmentjobs.com
+elte.hu
+mediacdn.vn
+panerabread.com
+asda.com
+hotnewhiphop.com
+moa.gov.cn
+sse.com.cn
+google.as
+ebaydesc.com
+henan.gov.cn
+bravotube.net
+58pic.com
+jobcan.jp
+liverpoolfc.com
+nick.com
+network-auth.com
+usccb.org
+unina.it
+alisoft.com
+ilgiornale.it
+tn.gov.in
+trekbikes.com
+aamc.org
+woolworths.com.au
+adsco.re
+krone.at
+publico.es
+bbva.com.ar
+mojang.com
+clegc-gckey.gc.ca
+4movierulz.lv
+ozbargain.com.au
+300.cn
+odin.com
+chicago.gov
+avature.net
+bet.com
+easywp.com
+culture.ru
+zoho.in
+therealdeal.com
+theonlygames.com
+metacpan.org
+ens.fr
+metapress.com
+cssmoban.com
+zapps.vn
+fajar.co.id
+staradvertiser.com
+autozone.com
+uni-wuerzburg.de
+knoji.com
+mootools.net
+mobalytics.gg
+peopleperhour.com
+uberant.com
+qeqeqe.com
+qhnky.com
+kinokong.org
+2checkout.com
+tp.edu.tw
+fender.com
+smith.edu
+up.gov.in
+anyflip.com
+blogspot.gr
+pdfescape.com
+a16z.com
+thrivecart.com
+alc.co.jp
+iihs.org
+cpta.com.cn
+labiennale.org
+panet.co.il
+1gb.ru
+agu.org
+ebaystatic.com
+bitauto.com
+turbosquid.com
+home.neustar
+collegeboard.com
+txstate.edu
+mondiad.net
+cac.gov.cn
+ielts.org
+binged.it
+seths.blog
+slideplayer.com
+donorbox.org
+aon.com
+newspicks.com
+moveon.org
+polyu.edu.hk
+tympanus.net
+zhangyu.tv
+lacity.org
+rebrandly.com
+gfx.ms
+qstheory.cn
+bbva.mx
+learningapps.org
+decider.com
+myrussianbride.net
+google.la
+fosshub.com
+find-a-bride.net
+klaviyo.com
+nvsp.in
+unirioja.es
+pilotonline.com
+userapi.com
+dressinn.com
+vonage.com
+morguefile.com
+carecredit.com
+adop.cc
+app.com
+gem.gov.in
+zhuwang.cc
+comparitech.com
+standaard.be
+affirm.com
+davivienda.com
+crello.com
+sudouest.fr
+travelerdoor.com
+destructoid.com
+amadeus.com
+megaphone.fm
+xhamsterpremium.com
+nursery.com.pk
+personalbadcreditloans.net
+thestranger.com
+bytefence.com
+aternos.org
+google.ad
+book118.com
+qiwi.com
+mandrillapp.com
+videopress.com
+cbr.ru
+firstcry.com
+vodafone.com
+igodigital.com
+m1finance.com
+ucpress.edu
+braintreegateway.com
+adventori.com
+check24.de
+ultraviewer.net
+placed.com
+theprint.in
+pojoksatu.id
+jewishvirtuallibrary.org
+standardchartered.com
+google.com.pa
+creditcards.com
+hellofresh.com
+wolterskluwer.com
+internetsociety.org
+bigmir.net
+tripadvisor.es
+careers360.com
+google.co.mz
+google.com.gh
+linuxjournal.com
+sqworl.com
+clickorlando.com
+la-croix.com
+nanowrimo.org
+vzw.com
+carbonblack.io
+israelnationalnews.com
+beeline.ru
+elledecor.com
+deere.com
+key.com
+babylon.com
+xmu.edu.cn
+runsignup.com
+joinhandshake.com
+eztv.re
+ntnu.edu.tw
+apkmirror.com
+google.co.bw
+perm.ru
+pandora.net
+goindigo.in
+tdatamaster.com
+cex.io
+cubadebate.cu
+elvenar.com
+beget.tech
+hipwee.com
+ksu.edu
+jbhifi.com.au
+shandong.gov.cn
+ffmpeg.org
+haiwainet.cn
+urssaf.fr
+grafthivecrocus.cam
+problogger.com
+google.com.cy
+telangana.gov.in
+uwindsor.ca
+openrice.com
+jcp.org
+francebleu.fr
+scotusblog.com
+zoopla.co.uk
+globalcitizen.org
+wpcomstaging.com
+ekstrabladet.dk
+videoamp.com
+marketo.net
+stylecaster.com
+eventbrite.com.au
+homeaway.com
+noor-book.com
+pbebank.com
+photoshop.com
+adsmoloco.com
+freshworks.com
+salute.gov.it
+stocksnap.io
+hunter.io
+tv2.no
+sundance.org
+ucc.ie
+geni.us
+elkhabar.com
+en-japan.com
+life360.com
+jetro.go.jp
+chuandong.com
+hyperallergic.com
+brightmountainmedia.com
+newsbomb.gr
+bmwgroup.com
+webdesignerdepot.com
+researchandmarkets.com
+sumibuy.com
+google.co.ug
+plarium.com
+greenbiz.com
+lianlianpay.com
+skimlinks.com
+uptvs.com
+openweathermap.org
+futureplc.com
+fee.org
+jekyllrb.com
+edf.org
+blinklist.com
+reliancedigital.in
+sacred-texts.com
+jomodns.com
+y-medialink.com
+gsma.com
+xspdf.com
+godaddysites.com
+oliveogrill.com
+praca.gov.pl
+auone.jp
+angularjs.org
+imgbox.com
+compressjpeg.com
+capitaloneshopping.com
+aramex.com
+techwalla.com
+acmethemes.com
+shop.app
+growingio.com
+lankasri.com
+bancobai.ao
+google.al
+qr-code-generator.com
+citizensadvice.org.uk
+labcorp.com
+xinnet.com
+seroundtable.com
+opswat.com
+kennedy-center.org
+mediatemple.net
+sicredi.com.br
+elgenero.com
+wesleyan.edu
+kiro7.com
+autocar.co.uk
+awardspace.com
+datingreviewer.net
+middleeasteye.net
+dundee.ac.uk
+west.cn
+hs-sites.com
+lbpicmt.com
+guitarcenter.com
+castbox.fm
+iwm.org.uk
+opera-api.com
+naftemporiki.gr
+environment.gov.au
+filefactory.com
+charter.net
+jobsdb.com
+piwik.org
+doctissimo.fr
+le.com
+knet.cn
+webcrawler.com
+elbilad.net
+umm.edu
+anonymize.com
+uca.fr
+1717pk.com
+dcfever.com
+rawpixel.com
+yourdomain.com
+ulg.ac.be
+bootstrapmade.com
+turnkeylinux.org
+translatewiki.net
+file-upload.com
+bnmla.com
+nxtbook.com
+ok.xxx
+ludashi.com
+e-gov.az
+preply.com
+on.cc
+prosieben.de
+google.bs
+studopedia.ru
+cfainstitute.org
+westword.com
+multitran.com
+trip.com
+mbank.pl
+uni-kiel.de
+doingbusiness.org
+hdzog.com
+joann.com
+591.com.tw
+ultimatix.net
+mentalhealth.org.uk
+sucursalelectronica.com
+dti.ne.jp
+travis-ci.org
+dea.gov
+cs-cart.com
+rsna.org
+oann.com
+asha.org
+wko.at
+mmo-champion.com
+akwam.co
+shine.com
+cqnews.net
+cnki.com.cn
+hatenablog.jp
+hypotheses.org
+papernow.org
+phoenix.edu
+cnipa.gov.cn
+gimy.co
+chefkoch.de
+ufc.com
+gumtree.co.za
+ki.se
+ekantipur.com
+metinfo.cn
+vk.me
+lordfilm.so
+westlaw.com
+ndl.go.jp
+oxfordlearnersdictionaries.com
+japan-guide.com
+tstatic.net
+nationaljournal.com
+surfline.com
+fr.de
+namequery.com
+probuilds.net
+richmond.edu
+ihs.com
+caijing.com.cn
+xiachufang.com
+accesstrade.net
+passkey.com
+hardrock.com
+icook.tw
+serif.com
+artfire.com
+google.sn
+eyereturn.com
+atimes.com
+register.com
+bmw.com
+google.mg
+informit.com
+marquette.edu
+netund.com
+gamepass.com
+lush.com
+jigsawplanet.com
+wayfair.ca
+teamtreehouse.com
+ksapisrv.com
+aktuality.sk
+muenchen.de
+franceculture.fr
+alibabacloud.com
+sciencemuseum.org.uk
+adpone.com
+tmweb.ru
+truepush.com
+admixer.net
+cqvip.com
+perimeterx.net
+360buyimg.com
+uibk.ac.at
+odn.ne.jp
+slu.edu
+steelseries.com
+native-instruments.com
+couriermail.com.au
+santander.cl
+transfermarkt.de
+yunexpress.com
+jobui.com
+51ade.com
+leanplum.com
+sefon.pro
+dartsearch.net
+dailyhunt.in
+espacenet.com
+wellandgood.com
+cbpp.org
+1001freefonts.com
+abqjournal.com
+celtra.com
+thefreelibrary.com
+egov-nsdl.com
+fy169.net
+alturl.com
+gomlab.com
+echosign.com
+livenation.com
+theplatform.com
+datingstudio.com
+teamwork.com
+loccitane.com
+google.com.jm
+brunch.co.kr
+vimeopro.com
+termly.io
+assemblee-nationale.fr
+ricardo.ch
+aol.co.uk
+sportsmansoutdoorsuperstore.com
+zakupki.gov.ru
+gg.gg
+qatarliving.com
+china360.cn
+alquds.co.uk
+tigerdirect.com
+seeking.com
+thehotline.org
+telus.com
+genome.gov
+melia.com
+trafficjunky.net
+sdo.com
+limetorrents.info
+soufun.com
+comedycentral.com
+widespace.com
+cgtrader.com
+stackpathdns.com
+toledoblade.com
+yn.gov.cn
+hsbc.co.uk
+hubapi.com
+zurb.com
+quicinc.com
+jabra.com
+instapage.com
+wenxuecity.com
+tvrain.ru
+einnews.com
+heytapimage.com
+stereogum.com
+moj.go.jp
+steepto.com
+kariyer.net
+dkb.de
+myfolio.com
+immi.gov.au
+cloudscar.com
+gongkong.com
+nianticlabs.com
+pikbest.com
+discourse.org
+crsky.com
+acehardware.com
+rhs.org.uk
+justin.tv
+lagou.com
+essence.com
+chinacourt.org
+pclady.com.cn
+wdl.org
+kw.com
+google.bi
+betweendigital.com
+toronto.edu
+labnol.org
+synology.me
+syri.net
+cpj.org
+yieldlab.net
+hclips.com
+eero.com
+chimpstatic.com
+marketingweek.com
+online2pdf.com
+meitu.com
+agilent.com
+time.is
+proximabeta.com
+usertesting.com
+mandarinoriental.com
+maff.go.jp
+mskcc.org
+tinypass.com
+allbusiness.com
+williamhill.com
+essex.ac.uk
+tass.com
+comenity.net
+verydesigner.cn
+groww.in
+elo7.com.br
+blic.rs
+uoa.gr
+pushmart.net
+hugoboss.com
+ncore.cc
+manhuagui.com
+internic.net
+arsenal.com
+fashionnova.com
+mondaq.com
+pge.com
+greentechmedia.com
+questdiagnostics.com
+ncjrs.gov
+zblogcn.com
+themeansar.com
+ngs.ru
+kansas.com
+brandpa.com
+insee.fr
+gizmochina.com
+answerthepublic.com
+yandex.com.tr
+sbs.co.kr
+megafon.ru
+vg247.com
+grailed.com
+everquote.com
+sc.gov.cn
+defra.gov.uk
+exhentai.org
+thewindowsclub.com
+getflywheel.com
+dynadot.com
+zuimeitianqi.com
+backcountry.com
+advfn.com
+bucknell.edu
+entekhab.ir
+kgw.com
+jomashop.com
+khan.co.kr
+pdf2doc.com
+poly.com
+monografias.com
+tekcities.com
+fastcoexist.com
+smrtb.com
+discoveryeducation.com
+helpscout.com
+athenahealth.com
+paper-helper.org
+wenjuan.com
+google.md
+adsoftheworld.com
+webengage.com
+telmex.com
+tv5monde.com
+jp.net
+izvestia.ru
+google.mk
+n4g.com
+json.org
+lancaster.ac.uk
+mihanwebhost.com
+uhaul.com
+starfall.com
+mathway.com
+proprofs.com
+nationalmssociety.org
+financesonline.com
+smotrim.ru
+journalism.org
+hometalk.com
+owncloud.com
+appsumo.com
+sxc.hu
+modernhealthcare.com
+voxeu.org
+pornolab.net
+w7000.com
+zaycev.net
+habrahabr.ru
+spot.im
+lavozdegalicia.es
+feng.com
+ametsoc.org
+google.com.bn
+osichem001.com
+goo-net.com
+governing.com
+fox8.com
+getdrip.com
+coltortiboutique.com
+indusind.com
+cdek.ru
+lamoda.ru
+cbd.int
+shalltry.com
+astrazeneca.com
+intesasanpaolo.com
+gameanalytics.com
+babyshop.com
+jenkins.io
+unibas.ch
+stylemixthemes.com
+iponweb.net
+wgbh.org
+money.pl
+tpsl-india.in
+mypearson.com
+maven.org
+yjtag.jp
+islcollective.com
+metrolyrics.com
+webkit.org
+jd.hk
+hmrc.gov.uk
+dailystrength.org
+ajio.com
+newchic.com
+playbill.com
+haoyangmao8.com
+handbrake.fr
+ohsu.edu
+siftscience.com
+omnihotels.com
+uline.com
+zenhabits.net
+fc.lc
+educacao.mg.gov.br
+name.com
+squadhelp.com
+google.com.na
+reebok.com
+oray.com
+lectortmo.com
+owncloud.org
+alison.com
+xenforo.com
+gamespy.com
+doi.gov
+boisestate.edu
+google.ps
+ascii.jp
+ponisha.ir
+your-server.de
+desjardins.com
+iltalehti.fi
+blogblog.com
+tvline.com
+gcs-web.com
+freakonomics.com
+aktualne.cz
+maidi.me
+wallpaperaccess.com
+videvo.net
+massgeneral.org
+codinghorror.com
+cebbank.com
+jumpcloud.com
+youcaring.com
+nexac.com
+wpzoom.com
+dsw.com
+mforos.com
+gcloudsdk.com
+blogcms.jp
+swiftserve.com
+alignable.com
+backblaze.com
+aso1.net
+gametrailers.com
+clicktale.net
+tripadvisor.it
+gunbroker.com
+thebodyshop.com
+ovid.com
+besthookupwebsites.net
+ylsw.com
+skyroom.online
+tu-darmstadt.de
+zonealarm.com
+ijie.com
+research.net
+unil.ch
+nikonusa.com
+hobbs.com
+customink.com
+takungpao.com
+eleconomista.com.mx
+seneweb.com
+businesstimes.com.sg
+ugm.ac.id
+flyertea.com
+4dex.io
+nctu.edu.tw
+e1.ru
+moncler.com
+gamingbible.co.uk
+essaywriting.org
+laughingsquid.com
+enotes.com
+netapp.com
+uba.ar
+sytes.net
+gansu.gov.cn
+oakley.com
+barcelona.cat
+olemiss.edu
+publons.com
+flexmls.com
+clickmeeting.com
+aphapublications.org
+onelink.to
+sakura.ad.jp
+local10.com
+udayton.edu
+svd.se
+avastbrowser.com
+australia.com
+shopifycloud.com
+winshang.com
+amazon.nl
+xxinn887.com
+lci.fr
+kaist.ac.kr
+90tiyu.com
+chem17.com
+thedailymeal.com
+disney.co.jp
+wizzair.com
+sam.gov
+jobrapido.com
+mediawallahscript.com
+journaldesfemmes.fr
+aeroflot.ru
+tb.cn
+ufmg.br
+museodelprado.es
+bathandbodyworks.com
+utsa.edu
+gomhuriaonline.com
+library.lol
+unibet.com
+warbyparker.com
+dogpile.com
+dailysignal.com
+daytondailynews.com
+qiku.com
+car.gr
+ycwb.com
+uni-lj.si
+stamped.io
+bmwusa.com
+toshiba.co.jp
+malaymail.com
+italki.com
+trend.az
+tirexo.io
+art.com
+ant.design
+typing.com
+google.com.ai
+blogspot.pt
+wickedlocal.com
+gov.on.ca
+juicyads.com
+meeshosupply.com
+1stdibs.com
+rts.ch
+technion.ac.il
+ah.gov.cn
+adbro.me
+everestjs.net
+sou300.com
+gelocal.it
+ferret-plus.com
+cpubenchmark.net
+titlemax.us
+linkhaitao.com
+blogspot.com.ar
+carvana.com
+ng.ru
+teepublic.com
+digital-photography-school.com
+joox.com
+yourwownews.com
+google.com.ag
+7news.com.au
+razerzone.com
+baixaki.com.br
+kcrw.com
+sogoucdn.com
+bybit.com
+eurosport.com
+liv.ac.uk
+720pizle.org
+niedersachsen.de
+hawaiinewsnow.com
+mercadolivre.com
+zdusercontent.com
+pillpack.com
+woman.ru
+iii.org
+lifesitenews.com
+latam.com
+fashionista.com
+byrenjia.com
+walkscore.com
+finder.com
+sgi.com
+papajohns.com
+firebaseapp.com
+delawareonline.com
+ufreegames.com
+gingersoftware.com
+bitmex.com
+journalstar.com
+moj.gov.cn
+shaanxi.gov.cn
+webgarden.com
+myfxbook.com
+ptc.com
+wpmudev.org
+oceanwp.org
+leiphone.com
+uncommongoods.com
+ecnu.edu.cn
+hiworks.com
+dapenti.com
+rense.com
+127.net
+katu.com
+besoccer.com
+hinative.com
+pitchbook.com
+waterstones.com
+monsterindia.com
+iauec.ac.ir
+pushapi.online
+dcloud.net.cn
+farnell.com
+naacp.org
+yayoi-kk.co.jp
+qualaroo.com
+chabad.org
+blog.gov.uk
+latrobe.edu.au
+nysed.gov
+liquidweb.com
+google.co.uz
+kurier.at
+jidapharm.com
+mywebsite-editor.com
+benchmarkemail.com
+ariba.com
+al-monitor.com
+webd.pl
+oa.com
+splunk.com
+alphacoders.com
+fec.gov
+huaxingchem.com
+sqwyw.org
+uc.pt
+retaildive.com
+washingtonmonthly.com
+elearningindustry.com
+artisteer.com
+hookupwebsites.org
+apptimize.com
+kxan.com
+nnov.ru
+doramasmp4.com
+realmailorderbrides.com
+adtilt.com
+guru3d.com
+art19.com
+noip.com
+admedo.com
+techbang.com
+wetv.vip
+btinternet.com
+karnataka.gov.in
+korrespondent.net
+postfun.com
+bluewin.ch
+shangxueba.com
+mailorderbrides.us
+applvn.com
+docstoc.com
+jarir.com
+scpr.org
+accountkit.com
+sharedid.org
+duowan.com
+elifesciences.org
+xnxx-cdn.com
+3dnews.ru
+submittable.com
+kempinski.com
+hangzhou.com.cn
+vbulletin.com
+jining.com
+snhu.edu
+rsa.com
+phillymag.com
+getpostman.com
+gfk.com
+daraz.com.bd
+converse.com
+1sept.ru
+cryptobrowser.site
+23hq.com
+squareblogs.net
+oui.sncf
+voz.vn
+gmx.com
+publico.pt
+flexera.com
+115.com
+americanbanker.com
+google.tt
+theme.co
+pulitzer.org
+coolmathgames.com
+8684.cn
+geoguessr.com
+google.co.zm
+thegospelcoalition.org
+mt.co.kr
+dergipark.org.tr
+sitey.me
+chrome.com
+americanheart.org
+xbytessolucoes.com
+gradeup.co
+tv9kannada.com
+telegraphindia.com
+javhd.com
+ghacks.net
+essay-company.com
+google.bf
+radiofarda.com
+perfectgirls.net
+zapmeta.ws
+woobox.com
+vans.com
+pressdemocrat.com
+auto.ru
+boxcdn.net
+airbnb.co.uk
+google.fm
+mellowads.com
+shopifycdn.com
+doctorswithoutborders.org
+m-team.cc
+nissanusa.com
+newwife.net
+leju.com
+careeronestop.org
+photon.com
+decipherinc.com
+bbva.es
+cointiply.com
+gold678.com
+alahlionline.com
+streameast.live
+visualwebsiteoptimizer.com
+iwanttodeliver.com
+tilda.cc
+hotrussianwomen.net
+itproportal.com
+bigstockphoto.com
+yougov.co.uk
+revolut.com
+jamesclear.com
+careerpower.in
+thewaltdisneycompany.com
+otago.ac.nz
+highcharts.com
+webestools.com
+grooveshark.com
+themarysue.com
+montiboutique.com
+mcafeewebadvisor.com
+z.com
+visitlondon.com
+londonstockexchange.com
+google.tm
+kazeo.com
+orst.edu
+thrivethemes.com
+jstv.com
+350.org
+mturk.com
+majorgeeks.com
+xunta.gal
+fgv.br
+russianfood.com
+1c.ru
+uberinternal.com
+overcast.fm
+9ku.com
+digitec.ch
+goskope.com
+ap7am.com
+radaronline.com
+phone.com
+socialnewpages.com
+southmoney.com
+1c-bitrix.ru
+weizmann.ac.il
+google.cg
+creativelive.com
+rescuetime.com
+tv2.dk
+shaolianhu.com
+sportsmole.co.uk
+cdnico.net
+hero-wars.com
+eljur.ru
+uni-erlangen.de
+barclaycardus.com
+uimaker.com
+summerhamster.com
+chng.it
+sanwen8.cn
+ename.net
+cash-central.net
+doostihaa.com
+plus.com
+sigmaaldrich.com
+etoland.co.kr
+fossbytes.com
+runative-syndicate.com
+fstoppers.com
+bna.com
+google.co.ck
+level3.com
+google.dm
+to10.gr
+leam.com
+bjs.gov
+pingdom.net
+privy.com
+realtor.org
+googlesource.com
+byrdie.com
+ignou.ac.in
+chapman.edu
+klook.com
+rtings.com
+intercom.help
+mycdn.me
+robbreport.com
+xmsecu.net
+info.com
+usal.es
+emerse.com
+upmusics.com
+bookshop.org
+earthcam.com
+weddingbee.com
+the-sun.com
+starfieldtech.com
+vresp.com
+courtlistener.com
+vub.ac.be
+cashnetusaapplynow.com
+otnolatrnup.com
+patient.info
+google.com.bz
+indeedassessments.com
+fieldengineer.com
+timeoutcn.com
+moondoge.co.in
+startappservice.com
+autoevolution.com
+ogunhaber.com
+bitdefender.net
+jibjab.com
+clideo.com
+trenitalia.com
+tmall.hk
+hokudai.ac.jp
+channelmyanmar.org
+dbnnmmxo.com
+royalcaribbean.com
+digitimes.com
+vwg-connect.cn
+besthookupwebsites.org
+money.com
+samsungotn.net
+nation.co.ke
+tureng.com
+appstate.edu
+comptia.org
+checkout.com
+timesfreepress.com
+uns.ac.id
+audi.com
+senat.fr
+escapistmagazine.com
+comdirect.de
+newssc.org
+cib.com.cn
+marieclaire.com.tw
+mca.gov.in
+biligame.com
+ktvu.com
+pinterest.ch
+gestyy.com
+hamyarwp.com
+desire2learn.com
+lohud.com
+online.de
+eiseverywhere.com
+rocketnews24.com
+wordle.net
+opentext.com
+xtemos.com
+lmgtfy.app
+wwu.edu
+google.vg
+boursorama.com
+katmoviehd.se
+tuiusuoxue.com
+merkur.de
+lawtime.cn
+arabianbusiness.com
+quantummetric.com
+thesimpledollar.com
+workplace.com
+magentocommerce.com
+visitscotland.com
+rover.com
+theamericanconservative.com
+providesupport.com
+fixya.com
+bpb.de
+easychair.org
+expertpaperwriter.com
+cnr.cn
+blog.hu
+brynmawr.edu
+manual.canon
+mysynchrony.com
+sci-hub.do
+in.com
+rusprofile.ru
+usm.edu
+gcu.edu
+gopejk.com
+vagina.nl
+texasmonthly.com
+point2homes.com
+wikia.org
+classiccars.com
+osticket.com
+optimole.com
+lancs.ac.uk
+ziraatbank.com.tr
+tnaflix.com
+colgate.com
+yelp.ca
+sgs.com
+cat.com
+wpfr.net
+martindale.com
+gangde.net
+globe.com.ph
+kobobooks.com
+cryptocompare.com
+dowjones.com
+reedsy.com
+cnstock.com
+asiae.co.kr
+ntv.io
+coinsbit.io
+akhbarona.com
+seo.com
+google.com.gi
+providencejournal.com
+movistarplus.es
+pib.gov.in
+pipex.com
+bosch.com
+admob.com
+leukemiatwinklesagacious.com
+instrument.com.cn
+hotwire.com
+wcvb.com
+dotesports.com
+binary.com
+sixflags.com
+newduba.cn
+pxhere.com
+convinceandconvert.com
+google.kg
+elnabaa.net
+tlauncher.org
+adobesc.com
+kfw.de
+weedmaps.com
+alphonso.tv
+canberratimes.com.au
+jumia.ma
+harveynichols.com
+genieesspv.jp
+qconcursos.com
+google.com.fj
+iisd.org
+governo.it
+dspunion.com
+skyscanner.com
+gazette.com
+simplypsychology.org
+google.sh
+traderjoes.com
+vseinstrumenti.ru
+juksy.com
+metopera.org
+atlanticcouncil.org
+36dm.com
+nbcmiami.com
+roberthalf.com
+osf.io
+google.rw
+sbb.ch
+taxheaven.gr
+aptoide.com
+carview.co.jp
+google.co.tz
+seemorgh.com
+casadellibro.com
+ravm.tv
+tn.com.ar
+canonical.com
+nudevista.com
+best2020-games-web1.com
+digistore24.com
+noxinfluencer.com
+zqtk.net
+armani.com
+crossfit.com
+netsolhost.com
+uni-bielefeld.de
+dailyfx.com
+tmon.co.kr
+microsoftstore.com
+sellfy.com
+hkex.com.hk
+chick-fil-a.com
+wechatos.net
+dx.com
+barstoolsports.com
+brownsfashion.com
+vpser.net
+nuget.org
+gotinder.com
+distractify.com
+ad-score.com
+haber61.net
+seventeen.com
+carscoops.com
+greasyfork.org
+austlii.edu.au
+scdn.vn
+almalnews.com
+tsn.ua
+domain.name
+spokeo.com
+shipstation.com
+tohoku.ac.jp
+aparat.cam
+chownow.com
+myzaker.com
+theladders.com
+rosettastone.com
+pixar.com
+prettybrides.net
+vix.com
+thesundaytimes.co.uk
+value-domain.com
+cao.go.jp
+unisa.edu.au
+worldscientific.com
+tessabit.com
+start.me
+groovefunnels.com
+teratail.com
+ya.ru
+p-n.io
+openai.com
+ren.tv
+splice.com
+google.ws
+zywjw.com
+goldprice.org
+countryattire.com
+ygdy8.net
+newser.com
+kongfz.com
+omicsonline.org
+skidrowreloaded.com
+appnext.com
+littlecdn.com
+undocs.org
+chinabus.info
+cmaj.ca
+mb.com.ph
+umms.org
+zybang.com
+songatak.vip
+signonsandiego.com
+minecraft-mp.com
+qiniup.com
+montereybayaquarium.org
+dailygram.com
+alza.cz
+state.mi.us
+ingramer.com
+tuv.com
+iktogo.com
+k-msedge.net
+gymshark.com
+megamillions.com
+motherearthnews.com
+peraichi.com
+lidingzhong.com
+fqtag.com
+revues.org
+vnsmart.com.vn
+regonline.com
+os.tc
+vw.com
+popmech.ru
+newsbeast.gr
+placeit.net
+privateinternetaccess.com
+rebelmouse.com
+eepw.com.cn
+hao245.com
+ocnk.net
+physicsworld.com
+zocdoc.com
+newsis.com
+cpuid.com
+cash-advanceloan.net
+carnegieendowment.org
+cosmo.ru
+getui.net
+anquan.org
+pirelli.com
+subscribeonandroid.com
+wunderlist.com
+malaysiakini.com
+utep.edu
+balkanweb.com
+islamweb.net
+cartitleloans.biz
+wgu.edu
+india.gov.in
+myasianbride.net
+programmableweb.com
+r18.com
+maersk.com
+chathamhouse.org
+netease.im
+interaction-design.org
+legaldaily.com.cn
+skai.gr
+team-bhp.com
+newcastle.edu.au
+approved-cash.com
+sponichi.co.jp
+google.sc
+england.nhs.uk
+bytetcdn.com
+citationmachine.net
+payu.com
+marketbeat.com
+rivm.nl
+freenode.net
+bentley.com
+xn--42c9bsq2d4f7a2a.com
+associatedcontent.com
+google.so
+premier.one
+rstudio.com
+coinspot.com.au
+kali.org
+nt.gov.au
+mass.edu
+larazon.es
+google.bt
+yingjiesheng.com
+shopstyle.co.uk
+rfa.org
+abcactionnews.com
+sony.co.jp
+nbabite.com
+kogan.com
+gdpr-info.eu
+rtbsystem.org
+censor.net
+google.bj
+ushistory.org
+gvsu.edu
+2ch.net
+blogg.se
+cs.com.cn
+istruzione.it
+icourse163.org
+achilles-ena.com
+rentalcars.com
+okex.com
+google.gg
+csai.cn
+stamps.com
+theclutcher.com
+tl88.net
+diadona.id
+openx.com
+carsensor.net
+libertaddigital.com
+geocities.ws
+bd-caict.com
+familydoctor.org
+visitsealife.com
+cdmx.gob.mx
+rips.icu
+wpsmail.net
+valassisdigital.io
+nel.goog
+bloomberg.co.jp
+activemind.de
+carto.com
+nagoya-u.ac.jp
+wp-events-plugin.com
+kioerd.com
+miamidade.gov
+google.co.zw
+mn.co
+tvpixel.com
+menards.com
+hcaptcha.com
+mtholyoke.edu
+stdaily.com
+moonbit.co.in
+antena3.com
+google.vu
+fitchratings.com
+blogher.com
+post.ch
+bps.org.uk
+getdropbox.com
+google.gl
+volunteermatch.org
+therealreal.com
+wmagazine.com
+ijinshan.com
+ecu.edu
+localsaver.com
+ard.de
+vinted.fr
+doda.jp
+awsapps.com
+fashion-press.net
+carbonite.com
+gutefrage.net
+blogspot.ro
+kew.org
+homeoffice.gov.uk
+myus.com
+snappfood.ir
+hsadspixel.net
+personalcapital.com
+premiumtimesng.com
+lotteon.com
+ifc.org
+fastspring.com
+uncg.edu
+umweltbundesamt.de
+1and1-editor.com
+doleta.gov
+google.ml
+tailorbrands.com
+google.cv
+dslreports.com
+wemakeprice.com
+simplywall.st
+disp.cc
+juicer.io
+kiwibox.com
+yhd.com
+gulte.com
+acsevents.org
+brainly.lat
+royalessays.co.uk
+k2s.cc
+filmfreeway.com
+rp.pl
+ne.gov
+prodigygame.com
+onthe.io
+securedvisit.com
+themailorderbride.com
+redwap.me
+chargebee.com
+rtbme24.com
+bershka.com
+screamingfrog.co.uk
+grants.gov
+els-cdn.com
+worldmarket.com
+ilmessaggero.it
+casasbahia.com.br
+contentsquare.net
+xmission.com
+sciencing.com
+google.dj
+haberzamani.com
+ppy.sh
+sanjesh.org
+canterbury.ac.nz
+fitsmallbusiness.com
+sass-lang.com
+valvesoftware.com
+pubhtml5.com
+riverisland.com
+samba.tv
+r-ad.ne.jp
+elite-brides.com
+lidl.de
+payumoney.com
+abcya.com
+redmine.org
+google.com.pg
+hemingwayapp.com
+cookinglight.com
+company.site
+infineon.com
+hanime.tv
+podia.com
+publicintegrity.org
+cimbclicks.com.my
+useit.com
+bbvanet.com.mx
+kontiki.com
+edmontonjournal.com
+google.mv
+ura.news
+cash4day.com
+patrika.com
+sinajs.cn
+howtoforge.com
+opoxv.com
+crossref.org
+footlocker.com
+behindwoods.com
+bloody-disgusting.com
+qccoccocmedia.vn
+abendblatt.de
+bigbasket.com
+themepalace.com
+5ykj.com
+siu.edu
+worldpopulationreview.com
+gepush.com
+cashlandloans.net
+cloudmobi.net
+thetrevorproject.org
+wlp-acs.com
+tanja24.com
+china-embassy.org
+procon.org
+ny1.com
+indiafcdn.com
+stylecraze.com
+getadblock.com
+etracker.de
+liveworksheets.com
+epizy.com
+laracasts.com
+euromonitor.com
+bookcrossing.com
+codal.ir
+createsend.com
+jl.gov.cn
+ashemaletube.com
+whitehatjr.com
+gxnews.com.cn
+wpml.org
+cluodlfare.com
+hpanalytics.net
+pepperdine.edu
+sunysb.edu
+cibercuba.com
+slader.com
+uam.es
+chemnet.com
+ancient.eu
+cookiedatabase.org
+epic.org
+skribbl.io
+hdfcsec.com
+v.gd
+nettruyen.com
+helixsleep.com
+museivaticani.va
+mediaindonesia.com
+uoc.edu
+oriflame.com
+samcart.com
+myinstallmentloans.net
+creighton.edu
+msocdn.com
+nationbuilder.com
+test.de
+cb2.com
+marchofdimes.org
+ixxx.com
+busuu.com
+internetlivestats.com
+howard.edu
+ac-versailles.fr
+joystiq.com
+metrotimes.com
+hudl.com
+psbc.com
+indodax.com
+sonyericsson.com
+sport24.gr
+newsbreak.gr
+calvin.edu
+payeer.com
+riverfronttimes.com
+epower.cn
+hotelscombined.com
+famousbirthdays.com
+360cities.net
+krushmedia.com
+aleteia.org
+trade.gov
+jiangxi.gov.cn
+wpscdn.com
+getapp.com
+inflationbreedinghoax.com
+kwsp.gov.my
+biologists.org
+xhamster4.com
+bebo.com
+transfermarkt.com
+kontakt.az
+google.ga
+russianqupid.com
+oreillynet.com
+adelaidenow.com.au
+samsungcloudcdn.com
+floridatoday.com
+asianetnews.com
+openclipart.org
+jc001.cn
+uma.es
+lexmark.com
+nationmaster.com
+tripadvisor.ru
+siteimproveanalytics.com
+peopleapp.com
+unionbankonline.co.in
+clickup.com
+welivesecurity.com
+aa.org
+find-your-bride.com
+adlooxtracking.com
+pubgmobile.com
+constitutioncenter.org
+merckmanuals.com
+360kuai.com
+internetworldstats.com
+vdo.ai
+brides-to-be.com
+marines.mil
+eb.mil.br
+php-fig.org
+topix.com
+asic.gov.au
+jiosaavn.com
+shopdisney.com
+uakron.edu
+bri.co.id
+ripple.com
+matadornetwork.com
+lofter.com
+london.edu
+bubble.io
+thequint.com
+internetgundem.com
+competethemes.com
+whispersystems.org
+datingrating.net
+rspb.org.uk
+google.je
+pinknews.co.uk
+whatwg.org
+bignox.com
+pangolin-sdk-toutiao.com
+geology.com
+backlog.com
+abstractfonts.com
+forlumineontor.com
+odysee.com
+istv.com.cn
+pantheon.io
+alwatanvoice.com
+google.ms
+solidot.org
+i-ready.com
+emlakgundemi.com.tr
+wearesocial.com
+google.st
+kemenag.go.id
+qiyi.com
+gaia.com
+transip.nl
+gamefront.com
+modelhub.com
+forecast7.com
+songkick.com
+coinmill.com
+parspack.com
+shanghai.gov.cn
+haodf.com
+eib.org
+xhamster2.com
+finextra.com
+glaad.org
+france.fr
+rim.or.jp
+google.mw
+phoenixnewtimes.com
+seobook.com
+google.gp
+danfoss.com
+practo.com
+stitchfix.com
+inaturalist.org
+bovada.lv
+cvut.cz
+mubi.com
+nykaa.com
+etnews.com
+hatenadiary.com
+safeway.com
+mobhey.com
+smartbrief.com
+up.ac.za
+arkansasonline.com
+www.gov.hk
+nhaccuatui.com
+bhf.org.uk
+vitacost.com
+cgiar.org
+hateblo.jp
+google.com.sb
+carbonbrief.org
+bostonmagazine.com
+google.cf
+dvdvideosoft.com
+huduser.gov
+csuchico.edu
+online-loan.org
+jbl.com
+mailorderbrides.dating
+uncrate.com
+kopilkaurokov.ru
+ray-ban.com
+adr.org
+info.gov.hk
+sportsnet.ca
+sandiego.edu
+cmswire.com
+image-line.com
+michaelkors.com
+flattr.com
+bilivideo.com
+benjerry.com
+liberal.gr
+i-mobile.co.jp
+presstv.com
+mediaad.org
+openwrt.org
+litmus.com
+timesofindia.com
+babson.edu
+samqaicongen.com
+picofile.com
+powerapps.com
+seg-social.es
+diariopanorama.com
+lycos.co.uk
+wsj.net
+bamgrid.com
+real.de
+forbes.com.mx
+amna.gr
+villanova.edu
+t3.com
+newsauto.gr
+gumtree.pl
+onegreenplanet.org
+apc.org
+lzu.edu.cn
+topnaz.com
+hdfilmcehennemi2.pw
+defimedia.info
+dota2.com
+top10chinesedatingsites.com
+accaglobal.com
+google.co.ls
+aau.dk
+zumiez.com
+digitalcameraworld.com
+svoboda.org
+straightdope.com
+fragrantica.ru
+gotquestions.org
+essaysrescue.com
+cloudapp.net
+lucid.app
+lacoste.com
+warnermediacdn.com
+sg-host.com
+cp4srvng.xyz
+cityofchicago.org
+hellogiggles.com
+redtube.zone
+mouseflow.com
+google.tl
+f5.com
+wishpond.com
+wright.edu
+justgetflux.com
+economia.gov.br
+wazirx.com
+wv.gov
+xbytes.ao
+impdesk.com
+google.sm
+ccidnet.com
+tecmundo.com.br
+textnow.com
+chinabyte.com
+unito.it
+fresnobee.com
+greatergood.com
+a2hosting.com
+nankai.edu.cn
+news12.com
+extremereach.io
+jvzoo.com
+fusion.net
diff --git a/src/atextcrawler/config.py b/src/atextcrawler/config.py
new file mode 100644
index 0000000..0a07727
--- /dev/null
+++ b/src/atextcrawler/config.py
@@ -0,0 +1,337 @@
+"""
+Configuration loader and validator.
+"""
+
+import os
+import re
+import sys
+from io import TextIOBase
+from pathlib import Path
+from typing import Any, Optional, Union
+
+from voluptuous import All
+from voluptuous import Any as VAny
+from voluptuous import Invalid, Length, Range, Required, Schema, Url
+from yaml import load
+
+try:
+    from yaml import CLoader as Loader  # type: ignore
+except ImportError:
+    from yaml import Loader  # type: ignore
+
+
+class ConfigError(Exception):
+    """
+    Application configuration error.
+    """
+
+    def __init__(self, err):
+        self.msg = str(err)
+
+    def __str__(self):
+        return f'Application configuration error: {self.msg}'
+
+
+class Config:
+    """
+    Application configuration.
+
+    Access the full application configuration using :meth:`get`.
+
+    It is a dictionary with these keys:
+
+      * 'directory': the configuration directory being used
+      * 'main': the main configuration from main.yaml, but
+        postgresql configuration may be overriden by environment
+        variable ATEXTCRAWLER_POSTGRESQL
+    """
+
+    config = None
+
+    @classmethod
+    def get(
+        cls,
+        out: Optional[TextIOBase] = None,
+    ) -> Optional[dict]:
+        """
+        Load and validate app configuration if not already done; return it.
+
+        On errors print them to *out* and if out is sys.stdout, then
+        also exit with exit code 2. Otherwise just return None.
+        """
+        if cls.config:
+            return cls.config
+        if out is None:
+            out = sys.stdout  # type: ignore
+        _config = _load_config()
+        msg = None
+        if isinstance(_config, ConfigError):
+            msg = f'ERROR: configuration could not be loaded: {_config}'
+        else:
+            config = _validate_config(_config)
+            if isinstance(config, ConfigError):
+                config_dir = _config.get('config_dir')
+                msg = (
+                    f'ERROR: invalid configuration in {config_dir}:'
+                    f' {config}'
+                )
+        if isinstance(_config, ConfigError) or isinstance(config, ConfigError):
+            print(msg, file=out)
+            if out == sys.stdout:
+                sys.exit(2)
+            else:
+                return None
+        config['postgresql']['min_size'] = config['crawl']['workers'] + 2
+        config['postgresql']['max_size'] = config['crawl']['workers'] + 2
+        cls.config = config
+        return config
+
+
+def _load_config() -> Union[ConfigError, dict]:
+    """
+    Load configuration; search in multiple directories.
+
+    We search these locations; the first location containing main.yaml
+    will be used::
+
+      * a directory defined in environment variable ATEXTCRAWLER_CONF
+      * subdir .config/atextcrawler in the user's home (`$HOME`)
+      * /etc/atextcrawler
+
+    In the same directory where this main.conf is located a subdirectory
+    'plugins' must exist and contain the configurations of plugins.
+
+    On failure return the first error and None.
+    Otherwise return None and a dict with these keys:
+
+        * `directory`: the used configuration directory
+        * `main`: the main application configuration
+        * `plugins`: a dict mapping plugins names to plugin configurations
+    """
+    Path(__file__).parent.parent
+    config_dirs = []
+    if env_conf := os.environ.get('ATEXTCRAWLER_CONFIG_DIR'):
+        config_dirs.append(Path(env_conf))
+    if env_home := os.environ.get('HOME'):
+        config_dirs.append(Path(env_home) / '.config' / 'atextcrawler')
+    config_dirs.append(Path('/etc/atextcrawler'))
+    for config_dir in config_dirs:
+        main_yaml_path = config_dir / 'main.yaml'
+        if main_yaml_path.exists():
+            break
+    else:
+        locs = ', '.join([str(loc) for loc in config_dirs if loc])
+        msg = (
+            f'Missing main.yaml in all config locations: {locs}\n'
+            f'Hint: You may use environment variable ATEXTCRAWLER_CONFIG_DIR'
+            f' to define a custom config directory.'
+        )
+        return ConfigError(msg)
+
+    # load main.yaml
+    try:
+        with main_yaml_path.open() as main_yaml:
+            main_config = load(main_yaml.read(), Loader=Loader)
+    except Exception as err:
+        return ConfigError(f'Invalid YAML in {main_yaml_path}:\n {err}')
+
+    # main_config must be a dict
+    if not isinstance(main_config, dict):
+        return ConfigError(f'File {main_yaml_path} must contain a dictionary')
+
+    # postgresql config from environment has precedence
+    postgresql_config = _get_env_postgresql()
+    if isinstance(postgresql_config, ConfigError):
+        return postgresql_config
+    main_config['postgresql'] = postgresql_config or main_config['postgresql']
+
+    main_config['config_dir'] = str(config_dir)
+    return main_config
+
+
+def _get_env_postgresql() -> Union[ConfigError, Optional[dict]]:
+    """
+    Load PostgreSQL config from environment variable ATEXTCRAWLER_POSTGRESQL.
+
+    Return an error or the PostgreSQL config (which can be None if
+    the environment variable is not defined.
+    """
+    env_var = 'ATEXTCRAWLER_POSTGRESQL'
+    value = os.environ.get(env_var, '').strip()
+    if not value:
+        return None
+    param_names = (
+        'host',
+        'port',
+        'database',
+        'user',
+        'password',
+        'schema_name',
+    )
+    re_dsn = re.compile(
+        '((' + '|'.join(param_names) + ')'
+        '=("(((?=[^"\\\\]).|\\\\.)*)"'  # value in double quotes
+        '|\'(((?=[^\'\\\\]).|\\\\.)*)\''  # value in single quotes
+        '|([^"\' ]*)'  # value unquoted
+        ')( |$))+?'
+    )
+    params = {}
+    for _, varname, _, v1, _, v2, _, v3, _ in re_dsn.findall(value):
+        params[varname] = (
+            v3
+            or (v1 or '').replace('\\"', '"')
+            or (v2 or '').replace("\\'", "'")
+        )
+    if 'host' not in params:
+        params['host'] = 'localhost'
+    if 'port' not in params:
+        params['port'] = '5432'
+    if 'schema_name' not in params:
+        params['schema_name'] = 'public'
+    for name in param_names:
+        if name not in params:
+            return ConfigError(
+                f'Missing {name} in environment variable {env_var}'
+            )
+    else:
+        params['port'] = int(params['port'])
+        return params
+
+
+def _validate_config(config: Any) -> Union[ConfigError, dict]:
+    """
+    Validate the given configuration and fill in default values.
+
+    If invalid, return only the first error.
+    Otherwise return the configuration with added default values.
+    """
+    try:
+        return schema_main(config)
+    except Exception as err:
+        return ConfigError(err)
+
+
+def plugins_dir(config):
+    """
+    Validate plugins directory (absolute or relative path).
+
+    If it is a relative path, prepend the config_dir.
+    """
+    config_dir = config['config_dir']
+    plugins_dir = config['plugins_dir']
+    if plugins_dir.startswith('/'):
+        try:
+            plugins_dir = Path(plugins_dir)
+        except:
+            raise Invalid(f'Invalid plugins_dir "{plugins_dir}" not found')
+    else:
+        try:
+            plugins_dir = str(Path(config_dir) / Path(plugins_dir))
+            config['plugins_dir'] = plugins_dir
+        except:
+            raise Invalid(f'Invalid plugins_dir "{plugins_dir}" not found')
+    if not (Path(plugins_dir) / '__init__.py').exists():
+        raise Invalid(f'plugins_dir "{plugins_dir}" has no "__init__.py"')
+    return config
+
+
+def postgresql_identifier(value):
+    """
+    Validate a PostgreSQL identifier.
+    """
+    if not isinstance(value, str) or not re.match(
+        '^[a-z][a-z0-9_]{0,30}$', value
+    ):
+        raise Invalid(
+            f'Invalid PostgreSQL identifier "{value}", '
+            f'pattern must be: [a-z][a-z0-9_]{0,30}'
+        )
+    return value
+
+
+def positive_number(value):
+    """
+    Validate a positive number (int or float).
+    """
+    if (isinstance(value, int) or isinstance(value, float)) and value > 0:
+        return value
+    raise Invalid('Not a positive number')
+
+
+schema_postgresql = Schema(
+    {
+        Required('host'): All(str, Length(min=1)),
+        Required('port', default=5432): All(int, Range(min=0, max=65535)),
+        Required('database'): All(str, Length(min=1)),
+        Required('user'): All(str, Length(min=1)),
+        Required('password'): str,
+        Required('schema_name', default='public'): postgresql_identifier,
+    }
+)
+
+
+schema_crawl = Schema(
+    {
+        Required('workers', default=10): All(int, Range(min=0, max=1000)),
+        Required('site_delay', default=600): positive_number,
+        Required('site_revisit_interval', default=3600): positive_number,
+        Required('resource_delay', default=5): positive_number,
+        Required('full_crawl_interval', default=864000): positive_number,
+        Required('feed_crawl_interval', default=86400): positive_number,
+    }
+)
+
+
+schema_elasticsearch = Schema(
+    {
+        Required('host'): All(str, Length(min=1)),
+        Required('api_key'): All(str, Length(min=1)),
+        Required('id'): All(str, Length(min=1)),
+        Required('index_base_name'): All(str, Length(min=1)),
+    }
+)
+
+
+schema_tensorflow = Schema(
+    {
+        Required('model_server_endpoint'): Url(),
+    }
+)
+
+
+schema_main = Schema(
+    All(
+        {
+            Required('config_dir'): All(str, Length(min=1)),
+            Required(
+                'instance_name', default='atextcrawler'
+            ): postgresql_identifier,
+            Required('instance_type', default='prod'): VAny(
+                'dev',
+                'staging',
+                'prod',
+            ),
+            Required('log_level', default='info'): VAny(
+                'critical',
+                'error',
+                'warning',
+                'info',
+                'debug',
+            ),
+            Required('plugins_dir', default='plugins'): All(
+                str, Length(min=1)
+            ),
+            Required('postgresql'): schema_postgresql,
+            Required('crawl'): schema_crawl,
+            Required('elasticsearch'): schema_elasticsearch,
+            Required('tensorflow'): schema_tensorflow,
+        },
+        plugins_dir,
+    )
+)
+
+
+if __name__ == '__main__':
+    from pprint import pprint
+
+    pprint(Config().get())
diff --git a/src/atextcrawler/crawl.py b/src/atextcrawler/crawl.py
new file mode 100644
index 0000000..323e454
--- /dev/null
+++ b/src/atextcrawler/crawl.py
@@ -0,0 +1,215 @@
+"""
+Crawl a site.
+"""
+
+import logging
+from datetime import datetime
+
+import aiohttp
+
+from .models import Crawl
+from .resource import ResourceFetcher, get_site_path, process_site_path
+from .site import (
+    RobotsInfo,
+    checkin_site,
+    checkout_site,
+    fetch_feeds,
+    process_site,
+    update_site,
+)
+from .tensorflow import TensorFlow
+
+logger = logging.getLogger(__name__)
+
+
+class CrawlWorker:
+    """
+    Worker fetching sites, crawling their resources and storing statistics.
+    """
+
+    def __init__(self, app, worker_number, pool):
+        self.app = app
+        self.worker_number = worker_number
+        self.pool = pool
+        self.site_delay = self.app.config['crawl']['site_delay']
+        self.resource_delay = self.app.config['crawl']['resource_delay']
+        self.site = None
+        self.crawl = None
+        self.running = True  # do crawl
+
+    def __await__(self):
+        return self.__ainit__().__await__()
+
+    async def __ainit__(self):
+        await self.startup()
+        return self
+
+    async def startup(self):
+        """
+        Asynchronous startup.
+        """
+        logger.info(f'Starting worker {self.worker_number}')
+        self.conn = await self.pool.acquire()
+        self.session = aiohttp.ClientSession()
+        self.fetcher = ResourceFetcher(self.session)
+        self.tf = TensorFlow(self.app, self.session)
+
+    async def shutdown(self):
+        """
+        Asynchronous shutdown.
+        """
+        logger.info(f'Shutting down worker {self.worker_number}')
+        await self.session.close()
+        await self.pool.release(self.conn)
+
+    async def run(self):
+        """
+        Worker loop: fetch a site, crawl its resources and store statistics.
+
+        If no site needs to be crawled, sleep for self.site_delay seconds
+        (configured in crawl.site_delay).
+        """
+        await self.app.sleep(2)
+        while self.app.running and self.running:
+            self.site, is_full, more = await checkout_site(self.app, self.conn)
+            if not self.site:
+                msg = f'Worker {self.worker_number}: sites exhausted'
+                logger.debug(msg)
+                if not more:
+                    await self.app.sleep(self.site_delay)
+                continue
+            self.crawl = await get_or_create_crawl(
+                self.conn, self.site.id_, is_full
+            )
+            try:
+                if is_full:
+                    site_upd, _ = await update_site(
+                        self.app,
+                        self.fetcher,
+                        self.conn,
+                        self.site.base_url,
+                        site=self.site,
+                    )
+                    if site_upd and site_upd.crawl_enabled:
+                        self.site = site_upd
+                        await process_site(
+                            self.fetcher,
+                            self.conn,
+                            self.site,
+                        )
+                elif self.site.crawl_enabled:
+                    await fetch_feeds(self.fetcher, self.conn, self.site)
+                if self.site.crawl_enabled:
+                    await self.crawl_resources()
+            except:
+                msg = (
+                    f'Worker {self.worker_number} failed crawl'
+                    f' {self.crawl.id_} of site {self.site.id_}'
+                    f' ({self.site.base_url})'
+                )
+                logger.exception(msg)
+            await self.crawl.finish(
+                self.conn, self.app.running and self.running
+            )
+            await checkin_site(self.app, self.conn, self.site, self.crawl)
+            msg = (
+                f'Worker {self.worker_number} finished crawl'
+                f' {self.crawl.id_}'
+            )
+            logger.debug(msg)
+            self.site = None
+            # if we were cancelled, but the app is still running, run again
+            if self.app.running:
+                self.running = True
+        msg = f'Closing crawler {self.worker_number}'
+        logger.debug(msg)
+
+    async def crawl_resources(self):
+        """
+        Loop over resources of the site and process them. Collect statistics.
+
+        All workers operate on distinct sites, so no need for locking here.
+        """
+        crawl_type = 'full' if self.crawl.is_full else 'feed'
+        msg = (
+            f'Worker {self.worker_number} beginning'
+            f' {crawl_type} crawl {self.crawl.id_}'
+            f' of site {self.site.id_} ({self.site.base_url})'
+        )
+        logger.info(msg)
+        resource_delay = self.resource_delay
+        robots = await RobotsInfo(self.site.base_url)
+        if robots.delay:
+            resource_delay = robots.delay
+        while self.app.running and self.running:
+            site_path = await get_site_path(
+                self.conn,
+                self.site,
+                self.crawl.t_begin,
+                only_new=not self.crawl.is_full,
+            )
+            if not site_path:
+                msg = (
+                    f'Worker {self.worker_number} ending crawl'
+                    f' {self.crawl.id_}: paths exhausted'
+                )
+                logger.info(msg)
+                return
+            try:
+                sp_filter = self.app.plugins['filter_site_path'].sp_filter
+                if sp_filter(self.site, site_path.path, robots):
+                    is_new_resource = await process_site_path(
+                        self.app,
+                        self.worker_number,
+                        self.conn,
+                        self.fetcher,
+                        self.tf,
+                        self.site,
+                        site_path,
+                    )
+                    if is_new_resource:
+                        self.crawl.n_resources_new += 1
+                    if is_new_resource is not None:
+                        self.crawl.n_resources += 1
+                    await self.app.sleep(resource_delay)
+                else:
+                    sql = (
+                        "UPDATE site_path SET"
+                        " last_visit=now() at time zone 'UTC',"
+                        " filtered=true"
+                        " WHERE id=$1"
+                    )
+                    await self.conn.execute(sql, site_path.id_)
+            except:
+                msg = (
+                    f'Worker {self.worker_number} processing path failed'
+                    f' in crawl {self.crawl.id_}: {site_path}'
+                )
+                logger.exception(msg)
+                site_path.ok_count -= 1
+                await site_path.save(self.conn)
+        msg = (
+            f'Worker {self.worker_number}: stopped crawl' f' {self.crawl.id_}'
+        )
+        logger.info(msg)
+
+
+async def get_or_create_crawl(conn, site_id, is_full=True) -> Crawl:
+    """
+    Return a new or existing+unfinished crawl.
+
+    If an existing crawl is found, return it, disregarding whether
+    it is a full crawl or not.
+    """
+    sql = "SELECT * FROM crawl WHERE site_id=$1 AND t_end is null LIMIT 1"
+    if row := await conn.fetchrow(sql, site_id):
+        return await Crawl().load_from_row(row)
+    else:
+        # create a new crawl
+        crawl = Crawl(
+            site_id=site_id,
+            is_full=is_full,
+            t_begin=datetime.utcnow(),
+        )
+        await crawl.save(conn)
+        return crawl
diff --git a/src/atextcrawler/db.py b/src/atextcrawler/db.py
new file mode 100644
index 0000000..89bf1c4
--- /dev/null
+++ b/src/atextcrawler/db.py
@@ -0,0 +1,162 @@
+"""
+PostgreSQL connectivity.
+
+PGPool can be used as context manager. It takes postgresql configuration
+parameters and gives a connection pool.
+"""
+
+import logging
+import sys
+from io import TextIOBase
+from pathlib import Path
+from traceback import format_exc
+from typing import Dict
+
+import asyncpg
+
+from .utils.json import json_dumps, json_loads
+
+logger = logging.getLogger(__name__)
+
+
+class PGPool:
+    """
+    Database connectivity: Provide a connection pool.
+
+    Can be used either as async context manager (giving a pool),
+    or as a class using async init and the shutdown method and
+    having the pool attribute.
+
+    After startup self.pool contains a PostgreSQL connection pool
+    (instance of :class:`asyncpg.pool.Pool`).
+
+    Startup also runs schema migrations (cf. directory `migrations`).
+    """
+
+    def __init__(
+        self,
+        postgresql_config: dict,
+        out: TextIOBase = None,
+        check: bool = True,
+    ) -> None:
+        self.conf = postgresql_config
+        self.out = out or sys.stdout
+        self.check = check
+        self.pool = None
+
+    def __await__(self):
+        return self.__ainit__().__await__()
+
+    async def __ainit__(self):
+        await self.__aenter__()
+        return self
+
+    async def __aenter__(self):
+        """
+        Return the connection pool after an optional check.
+
+        The check tests basic database access and runs missing migrations.
+        If the check fails, return None.
+        """
+        pool_params = {
+            key: val
+            for key, val in self.conf.items()
+            if key
+            in (
+                'host',
+                'port',
+                'database',
+                'user',
+                'password',
+                'max_size',
+                'min_size',
+            )
+        }
+        pool_params['command_timeout'] = 30
+        self.pool = await asyncpg.create_pool(**pool_params, init=self._init)
+        if self.check:
+            async with self.pool.acquire() as conn:
+                if await self.check_or_migrate(conn):
+                    return self.pool
+
+    @staticmethod
+    async def _init(conn) -> None:
+        """
+        Add JSON encoding and decoding to the given connection.
+        """
+        await conn.set_type_codec(
+            'jsonb',
+            encoder=json_dumps,
+            decoder=json_loads,
+            schema='pg_catalog',
+        )
+
+    async def __aexit__(self, exc_type, exc, tb) -> None:
+        """
+        Close the connection pool.
+        """
+        await self.shutdown()
+
+    async def shutdown(self):
+        """
+        Close the pool.
+        """
+        await self.pool.close()
+
+    async def check_or_migrate(self, conn: asyncpg.Connection) -> bool:
+        """
+        Check database connectivity.
+
+        Return whether database connectivity is working.
+        """
+        row = await conn.fetchrow('SELECT 1+1 AS result')
+        if not row or row.get('result') != 2:
+            msg = 'Database SELECT 1+1 not working; missing privileges?'
+            print(msg, file=self.out)
+            logger.critical(msg)
+            return False
+
+        # determine current schema_version
+        try:
+            sql = "SELECT value::int FROM kvs WHERE key='schema_version'"
+            schema_version = await conn.fetchval(sql)
+        except:
+            schema_version = 0
+
+        # run missing migrations
+        migrations = get_migrations()
+        for number, text in sorted(migrations.items()):
+            if number > schema_version:
+                cmds = text.split('\n----\n')
+                for cmd in cmds:
+                    if not cmd.strip():
+                        continue
+                    try:
+                        await conn.execute(cmd)
+                    except:
+                        msg = (
+                            f'Exception during migration {number} in '
+                            f'statement\n{cmd}'
+                        )
+                        print(msg, file=self.out)
+                        logger.critical(msg)
+                        print(format_exc(), file=self.out)
+                        logger.critical(format_exc())
+                        return False
+
+        # return success
+        return True
+
+
+def get_migrations() -> Dict[int, str]:
+    """
+    Return migrations (number and text content of migration file).
+    """
+    migrations_dir = Path(__file__).parent / 'migrations'
+    migrations = {}
+    for migration_file in migrations_dir.glob('*.sql'):
+        migration_number = int(migration_file.name[:-4])
+        with migration_file.open() as mig_file:
+            content = mig_file.read()
+        migrations[migration_number] = content
+    return migrations
diff --git a/src/atextcrawler/migrations/1.sql b/src/atextcrawler/migrations/1.sql
new file mode 100644
index 0000000..0c4053f
--- /dev/null
+++ b/src/atextcrawler/migrations/1.sql
@@ -0,0 +1,297 @@
+CREATE TABLE kvs (
+    id bigserial PRIMARY KEY,
+    t_update timestamp NOT NULL DEFAULT (now() at time zone 'utc'),
+    key varchar(200) NOT NULL UNIQUE,
+    value jsonb
+)
+----
+COMMENT ON COLUMN kvs.t_update IS 'Time of last update or insert of the entry';
+----
+COMMENT ON COLUMN kvs.key IS 'Key';
+----
+COMMENT ON COLUMN kvs.value IS 'Value';
+----
+COMMENT ON TABLE kvs IS 'Simple key-value store';
+----
+INSERT INTO kvs (key, value) VALUES ('schema_version', '1');
+----
+CREATE TABLE site (
+    id bigserial PRIMARY KEY,
+    canonical_url varchar(200),
+    base_url varchar(200) NOT NULL,
+    base_urls varchar(200)[] NOT NULL,
+    domains varchar(100)[],
+    ips inet[] NULL,
+    crawl_enabled bool NOT NULL DEFAULT false,
+    crawl_active bool NOT NULL DEFAULT false,
+    next_full_crawl timestamp,
+    next_feed_crawl timestamp,
+    last_update timestamp,
+    last_pub timestamp,
+    pub_dates jsonb NOT NULL DEFAULT '{}'::jsonb,
+    langs char(2)[] NOT NULL DEFAULT ARRAY[]::varchar(2)[],
+    alt_langs jsonb NOT NULL DEFAULT '{}'::jsonb,
+    title varchar(200),
+    description varchar(2000),
+    keywords varchar(50)[] NOT NULL DEFAULT ARRAY[]::varchar(50)[],
+    linkbacks jsonb NOT NULL DEFAULT '{}'::jsonb,
+    meta_info jsonb NOT NULL DEFAULT '{}'::jsonb,
+    boilerplate_texts jsonb NOT NULL DEFAULT '[]'::jsonb
+)
+----
+CREATE INDEX site__base_url ON site (base_url)
+----
+CREATE INDEX site__base_urls ON site (base_urls)
+----
+CREATE INDEX site__domains ON site (domains)
+----
+CREATE INDEX site__ips ON site (ips)
+----
+CREATE INDEX site__next_full_crawl ON site (next_full_crawl)
+----
+CREATE INDEX site__next_feed_crawl ON site (next_feed_crawl)
+----
+CREATE INDEX site__langs ON site (langs)
+----
+CREATE INDEX site__title ON site (title)
+----
+CREATE INDEX site__description ON site (description)
+----
+CREATE INDEX site__keywords ON site (keywords)
+----
+COMMENT ON COLUMN site.base_url IS 'Preferred base URLs (from column base_urls)'
+----
+COMMENT ON COLUMN site.base_urls IS 'Base URLs that have been found to return the same content'
+----
+COMMENT ON COLUMN site.domains IS 'Domains that have been found to return the same content'
+----
+COMMENT ON COLUMN site.ips IS 'IPv4 or IPv6 addresses of the hostnames in base_urls'
+----
+COMMENT ON COLUMN site.crawl_enabled IS 'Whether the site is should be indexed'
+----
+COMMENT ON COLUMN site.crawl_active IS 'Whether the crawl is in progress'
+----
+COMMENT ON COLUMN site.next_full_crawl IS 'Crawl all resources of this site again after this instant of time; do not crawl if null'
+----
+COMMENT ON COLUMN site.next_feed_crawl IS 'Crawl the feed resources of this site again after this instant of time; do not crawl if null'
+----
+COMMENT ON COLUMN site.last_update IS 'Time of last update of this site (in this database)'
+----
+COMMENT ON COLUMN site.last_pub IS 'Estimated time of last content publication on the site'
+----
+COMMENT ON COLUMN site.pub_dates IS 'Change history: map visit date to estimated publication date'
+----
+COMMENT ON COLUMN site.langs IS 'Languages of the site (ISO 639-1 codes)'
+----
+COMMENT ON COLUMN site.alt_langs IS 'Map links to alternative language versions of the site to ISO 639-1 languages codes'
+----
+COMMENT ON COLUMN site.title IS 'Title as obtained from title tag or meta tags'
+----
+COMMENT ON COLUMN site.description IS 'Description as obtained from meta tags'
+----
+COMMENT ON COLUMN site.keywords IS 'Keywords as obtained from meta tags'
+----
+COMMENT ON COLUMN site.linkbacks IS 'Map URL to type of linkback (cf. https://en.wikipedia.org/wiki/Linkback)'
+----
+COMMENT ON COLUMN site.meta_info IS 'Values from meta tags and other meta information'
+----
+COMMENT ON COLUMN site.boilerplate_texts IS 'Boilerplate texts on the startpage and other sample pages'
+----
+COMMENT ON TABLE site IS 'Website'
+----
+CREATE TABLE site_queue (
+    id bigserial PRIMARY KEY,
+    src bigint NULL REFERENCES site(id) ON DELETE CASCADE,
+    url varchar(200) NOT NULL,
+    link_text varchar(100),
+    t_create timestamp NOT NULL DEFAULT (now() at time zone 'utc')
+)
+----
+CREATE INDEX site_queue__url ON site_queue (url)
+----
+COMMENT ON COLUMN site_queue.src IS 'The id of the linking site; null in case of seeds or manual additions'
+----
+COMMENT ON COLUMN site_queue.url IS 'Base URL of site to be assessed, ending with a slash or a mandatory base path'
+----
+COMMENT ON COLUMN site_queue.link_text IS 'Text under the anchor tag on the source site'
+----
+COMMENT ON COLUMN site_queue.t_create IS 'Creation time of this entry'
+----
+COMMENT ON TABLE site_queue IS 'Queued site URLs'
+----
+CREATE TABLE site_feed (
+    id bigserial PRIMARY KEY,
+    site_id bigint NOT NULL REFERENCES site(id) ON DELETE CASCADE,
+    url varchar(200) NOT NULL,
+    etag text,
+    modified varchar(50),
+    t_visit timestamp,
+    t_content timestamp,
+    version varchar(10),
+    title varchar(200),
+    description text,
+    fail_count smallint NOT NULL DEFAULT 0
+)
+----
+CREATE INDEX site_feed__site ON site_feed (site_id)
+----
+CREATE INDEX site_feed__t_content ON site_feed (t_content)
+----
+COMMENT ON COLUMN site_feed.site_id IS 'Id of the site on which this feed was found'
+----
+COMMENT ON COLUMN site_feed.url IS 'URL of the feed'
+----
+COMMENT ON COLUMN site_feed.etag IS 'Etag obtained when requesting the feed'
+----
+COMMENT ON COLUMN site_feed.modified IS 'Last-Modified HTTP header value obtained when requesting the feed'
+----
+COMMENT ON COLUMN site_feed.t_visit IS 'Time of last retrieval of the feed; null before first retrival'
+----
+COMMENT ON COLUMN site_feed.t_content IS 'Time of last content update; null before first retrieval'
+----
+COMMENT ON COLUMN site_feed.version IS 'Version of the feed; null before first retrival'
+----
+COMMENT ON COLUMN site_feed.title IS 'Title of the feed; null before first retrival'
+----
+COMMENT ON COLUMN site_feed.description IS 'Description of the feed; null before first retrival'
+----
+COMMENT ON COLUMN site_feed.fail_count IS 'Number of failed retrievals after last successful retrieval; zero before first retrival'
+----
+CREATE TABLE site_link (
+    id bigserial PRIMARY KEY,
+    src bigint NOT NULL REFERENCES site(id) ON DELETE CASCADE,
+    dst bigint NOT NULL REFERENCES site(id) ON DELETE CASCADE,
+    t_create timestamp NOT NULL DEFAULT (now() at time zone 'utc'),
+    link_text varchar(100)
+)
+----
+ALTER TABLE site_link ADD CONSTRAINT site_link_edge UNIQUE (src, dst)
+----
+CREATE INDEX site_link__src ON site_link (src)
+----
+CREATE INDEX site_link__dst ON site_link (dst)
+----
+COMMENT ON COLUMN site_link.src IS 'Source site'
+----
+COMMENT ON COLUMN site_link.dst IS 'Destination site'
+----
+COMMENT ON COLUMN site_link.t_create IS 'Time of creation of this entry'
+----
+COMMENT ON COLUMN site_link.link_text IS 'Text under the anchor tag on the source site'
+----
+COMMENT ON TABLE site_link IS 'Cross-site link'
+----
+CREATE TABLE resource (
+    id bigserial PRIMARY KEY,
+    simhash bigint,
+    content_type varchar(50),
+    last_change timestamp,
+    text_len int,
+    lang char(2),
+    title varchar(200),
+    summary varchar(2000)
+)
+----
+COMMENT ON COLUMN resource.simhash IS 'Simhash of the text content of the resource'
+----
+COMMENT ON COLUMN resource.content_type IS 'Content type extracted from Content-Type HTTP header'
+----
+COMMENT ON COLUMN resource.last_change IS 'Estimated time of the last update of this resource'
+----
+COMMENT ON COLUMN resource.text_len IS 'Length of the extracted text in characters'
+----
+COMMENT ON COLUMN resource.lang IS 'Language ISO 639-1 code'
+----
+COMMENT ON COLUMN resource.title IS 'Title of the resource (used for feed resources)'
+----
+COMMENT ON COLUMN resource.summary IS 'Content summary of the resource (used for feed resources)'
+----
+COMMENT ON TABLE resource IS 'Text resource (may be reachable by more than one path of a site)'
+----
+CREATE TABLE site_path (
+    id bigserial PRIMARY KEY,
+    site_id bigint NOT NULL REFERENCES site(id) ON DELETE CASCADE,
+    path varchar(400) NOT NULL,
+    last_visit timestamp,
+    filtered bool NOT NULL DEFAULT false,
+    ok_count smallint NOT NULL DEFAULT 0,
+    canonical bool,
+    resource_id bigint REFERENCES resource(id) ON DELETE CASCADE
+)
+----
+ALTER TABLE site_path ADD CONSTRAINT site_path__unique UNIQUE (site_id, path)
+----
+CREATE INDEX site_path__site_path ON site_path (site_id, path)
+----
+CREATE INDEX site_path__resource ON site_path (resource_id)
+----
+COMMENT ON COLUMN site_path.site_id IS 'Site id'
+----
+COMMENT ON COLUMN site_path.path IS 'Path'
+----
+COMMENT ON COLUMN site_path.last_visit IS 'Time of last retrieval of the resource; null before first retrival'
+----
+COMMENT ON COLUMN site_path.ok_count IS 'Increased by 1 for every successful retrieval of the resource and decreased by 1 for every failed'
+----
+COMMENT ON COLUMN site_path.canonical IS 'Whether the path is the canonical one for the resource; null before first retrival'
+----
+COMMENT ON COLUMN site_path.resource_id IS 'Resource id; null before first retrieval'
+----
+COMMENT ON TABLE site_path IS 'Paths of a site pointing to text resources'
+----
+CREATE TABLE crawl (
+    id bigserial PRIMARY KEY,
+    site_id bigint NOT NULL REFERENCES site(id) ON DELETE CASCADE,
+    is_full bool NOT NULL DEFAULT false,
+    t_begin timestamp,
+    t_end timestamp,
+    n_resources int NOT NULL DEFAULT 0,
+    n_resources_new int NOT NULL DEFAULT 0
+)
+----
+CREATE INDEX crawl__site ON crawl (site_id)
+----
+CREATE INDEX crawl__t_begin ON crawl (t_begin)
+----
+COMMENT ON COLUMN crawl.site_id IS 'Site that is being crawled'
+----
+COMMENT ON COLUMN crawl.is_full IS 'Whether the crawl is a full crawl; if not it is a feed crawl'
+----
+COMMENT ON COLUMN crawl.t_begin IS 'Begin time of the crawl'
+----
+COMMENT ON COLUMN crawl.t_end IS 'End time of the crawl; if t_end is null resuming a crawl will fetch all resources with last_visit before t_begin'
+----
+COMMENT ON COLUMN crawl.n_resources IS 'Number of resources that were fetched during the crawl'
+----
+COMMENT ON COLUMN crawl.n_resources_new IS 'Number of new resources found during the crawl'
+----
+COMMENT ON TABLE resource IS 'Crawl of resources on a site'
+----
+CREATE TYPE site_annotation_type AS ENUM ('whitelist', 'blacklist', 'suggestion', 'review', 'audience', 'location', 'themes', 'timescale')
+----
+COMMENT ON TYPE site_annotation_type IS 'Type of site annotation'
+----
+CREATE TABLE site_annotation (
+    id bigserial PRIMARY KEY,
+    site_id bigint REFERENCES site(id) ON DELETE SET NULL,
+    base_url varchar(200) NOT NULL,
+    ann_type site_annotation_type NOT NULL,
+    ann_content JSONB,
+    t_update timestamp NOT NULL DEFAULT (now() at time zone 'utc')
+)
+----
+CREATE INDEX site_annotation__site ON site_annotation (site_id)
+----
+CREATE INDEX site_annotation__base_url ON site_annotation (base_url)
+----
+COMMENT ON COLUMN site_annotation.site_id IS 'Site that is being annotated'
+----
+COMMENT ON COLUMN site_annotation.base_url IS 'Base URL of the site being annotated'
+----
+COMMENT ON COLUMN site_annotation.ann_type IS 'Annotation type'
+----
+COMMENT ON COLUMN site_annotation.ann_content IS 'Annotation content'
+----
+COMMENT ON COLUMN site_annotation.t_update IS 'Time of last update'
+----
+COMMENT ON TABLE site_annotation IS 'Manual annotations on a site'
diff --git a/src/atextcrawler/models.py b/src/atextcrawler/models.py
new file mode 100644
index 0000000..934b791
--- /dev/null
+++ b/src/atextcrawler/models.py
@@ -0,0 +1,610 @@
+"""
+Data Models.
+"""
+
+import logging
+from dataclasses import InitVar, asdict, dataclass, field, fields
+from datetime import date, datetime
+from itertools import chain
+from typing import Any, ClassVar, Optional
+
+import tldextract
+from asyncpg import Connection
+
+from .search import delete_resource
+from .utils.durl import Durl, get_url_variants
+from .utils.link import extract_domain
+from .utils.similarity import get_simhash, simhash_to_bigint
+
+logger = logging.getLogger(__name__)
+
+
+class ModelBase:
+    """
+    Abstract base class for models.
+
+    Execute SQL to load, save, delete instances using asyncpg.
+    """
+
+    table: ClassVar
+    id_: Optional[int] = 0
+
+    async def load(self, conn: Connection, id_: int) -> Optional[Any]:
+        """
+        If loading fails, return None.
+        """
+        sql = f"SELECT * FROM {self.table} WHERE id=$1"
+        row = await conn.fetchrow(sql, id_)
+        if not row:
+            return None
+        return await self.load_from_row(row)
+
+    async def load_from_row(self, row):
+        """
+        If row is None, return None.
+        """
+        if not row:
+            return None
+        data = dict(row)
+        self.id_ = data.pop('id')
+        self.__init__(**data)
+        return self
+
+    async def save(self, conn: Connection) -> None:
+        """
+        Save the instance (update if self.id_ is set, else insert).
+        """
+        data = asdict(self)
+        # logger.debug(f'Save {self}: id_={self.id_}')
+        if self.id_:  # update
+            cols = ', '.join(data.keys())
+            upds = ', '.join(
+                [f'{col}=${i + 1}' for i, col in enumerate(data.keys())]
+            )
+            val_id = f'${len(data) + 1}'
+            sql = f"UPDATE {self.table} SET {upds} WHERE id={val_id}"
+            await conn.execute(sql, *data.values(), self.id_)
+        else:  # insert
+            cols = ', '.join(data.keys())
+            vals = ', '.join([f'${i + 1}' for i in range(len(data))])
+            sql = (
+                f"INSERT INTO {self.table} ({cols}) VALUES ({vals})"
+                f" RETURNING id"
+            )
+            self.id_ = await conn.fetchval(sql, *data.values())
+
+    def asdict(self):
+        """
+        Return instance data as dictionary.
+        """
+        return asdict(self)
+
+    async def delete(self, conn: Connection) -> None:
+        """
+        Delete the object if it has an id_.
+        """
+        if self.id_:
+            sql = f"DELETE FROM {self.table} WHERE id=$1"
+            await conn.execute(sql, self.id_)
+
+
+class ResourceError:
+    """
+    Error encountered while trying to fetch a resource.
+
+    ResourceError is used for cases when fetching a resource fails.
+    """
+
+    def __init__(self, msg, status=None, headers=None):
+        self.msg = msg
+        self.status = status
+        self.headers = headers
+
+    def __repr__(self):
+        return f'ResourceError: {self.msg}'
+
+
+class ResourceRedirect:
+    """
+    A resource containing a redirect.
+    """
+
+    def __init__(self, urls):
+        self.urls = urls
+
+
+@dataclass
+class TextResource(ModelBase):
+    """
+    TextResource (without path).
+
+    TextResource models web resources with relevant text content.
+    They are instantiated in modules page, document, ...; their metadata
+    are stored in table `resource` and the text content is stored with the
+    search engine.
+
+    Do not confuse with SitePath: Several SitePath instances
+    may point to a TextResource. The TextResource holds the actual content.
+
+    If we are not dealing with the startpage of a new site,
+    the init_fields dict usually will contain the site to which
+    the resource belongs.
+    """
+
+    table: ClassVar = 'resource'
+    init_fields: InitVar[dict] = None  # additional fields after fetching
+    search_fields: InitVar[dict] = None  # additional fields for indexing
+
+    # database fields
+    simhash: Optional[int] = None
+    content_type: Optional[str] = None
+    last_change: Optional[datetime] = None
+    text_len: int = 0
+    lang: Optional[str] = None
+    title: Optional[str] = None
+    summary: Optional[str] = None
+
+    def __post_init__(self, init_fields, search_fields):
+        if init_fields is None:
+            init_fields = {}
+        self.init_fields = init_fields
+        if search_fields is None:
+            search_fields = {}
+        self.search_fields = search_fields
+        self.site = self.init_fields.get('site')
+        self.site_id = self.site.id_ if self.site else None
+        self._update_simhash()
+
+    def __str__(self):
+        return (
+            f'TextResource(id={self.id_},'
+            f' site_id={self.site_id},'
+            f' type={self.content_type})'
+        )
+
+    def _update_simhash(self):
+        """
+        Update the simhash of the resource from its text content.
+        """
+        if self.simhash is None:
+            text = self.search_fields.get('text', '')
+            self.simhash = simhash_to_bigint(get_simhash(text))
+
+    async def save(self, conn: Connection):
+        """
+        Save the instance, extending the parent's method.
+        """
+        self.content_type = (
+            self.content_type[:50] if self.content_type else None
+        )
+        self.title = self.title[:200] if self.title else None
+        self.summary = self.summary[:400] if self.summary else None
+        self._update_simhash()
+        if self.last_change is None:
+            self.last_change = datetime.utcnow()
+        await super().save(conn)
+
+    async def update_from_resource(self, upd: 'TextResource'):
+        """
+        Update self with values from another resource.
+        """
+        names = [field.name for field in fields(self)]
+        for name in names:
+            cur_val = getattr(self, name)
+            upd_val = getattr(upd, name)
+            if not cur_val and upd_val is not None:
+                setattr(self, name, upd_val)
+        init_names = [
+            'headers',
+            'redirects',
+            'links_int',
+            'links_ext',
+            'shortlinks',
+            'canonical',
+            #'head',
+        ]
+        self.init_fields = upd.init_fields
+        self.search_fields = upd.search_fields
+        # for init_name in init_names:
+        #    cur_val = self.init_fields.get(init_name)
+        #    upd_val = upd.init_fields.get(init_name)
+        #    if not cur_val and upd_val is not None:
+        #        self.init_fields[init_name] = upd_val
+
+
+@dataclass
+class MetaResource(ModelBase):
+    """
+    Parent class for Feed, Sitemap, SitemapIndex.
+
+    MetaResource is a parent class for Feed, Sitemap, SitemapIndex.
+    Their instances are not stored. Note: class Feed contains feed meta data
+    and is stored in the database.
+    """
+
+
+@dataclass
+class SitemapIndex(MetaResource):
+    """
+    A SitemapIndex meta resource.
+
+    Just a list of the siteap URLs, nothing more.
+    """
+
+    sitemaps: list = field(default_factory=list)
+
+
+@dataclass
+class Sitemap(MetaResource):
+    """
+    A Sitemap meta resource.
+
+    Just a list of the resulting links, nothing more.
+    """
+
+    urls: list = field(default_factory=list)
+
+
+@dataclass
+class Feed(MetaResource):
+    """
+    A site's feed (RSS, Atom , ...).
+    """
+
+    table: ClassVar = 'site_feed'
+    entries: InitVar[list] = None
+    site_id: Optional[int] = None
+    url: Optional[str] = None
+    etag: Optional[str] = None
+    modified: Optional[str] = None
+    t_visit: Optional[datetime] = None
+    t_content: Optional[datetime] = None
+    version: Optional[str] = None
+    title: Optional[str] = None
+    description: Optional[str] = None
+    fail_count: int = 0
+
+    def __post_init__(self, entries):
+        self.entries = entries
+
+    def __str__(self):
+        return f'Feed(id={self.id_}, site_id={self.site_id}, url={self.url})'
+
+    async def save(self, conn: Connection):
+        """
+        Save, trying to merge with existing entry matching on site_id and url.
+        """
+        if not self.site_id or not self.url:
+            msg = f'Saving feed failed: missing site_id of url'
+            logger.error(msg)
+            return
+        sql = "SELECT id FROM site_feed WHERE site_id=$1 AND url=$2"
+        self.id_ = await conn.fetchval(sql, self.site_id, self.url)
+        await super().save(conn)
+
+    def debug(self) -> str:
+        """
+        Return the instance data asa string for debug print output.
+        """
+        return (
+            f'Feed:\n'
+            f'- id: {self.id_}\n'
+            f'- site_id: {self.site_id}\n'
+            f'- url: {self.url}\n'
+            f'- etag: {self.etag}\n'
+            f'- modified: {self.modified}\n'
+            f'- t_visit: {self.t_visit}\n'
+            f'- t_content: {self.t_content}\n'
+            f'- version: {self.version}\n'
+            f'- title: {self.title}\n'
+            f'- description: {self.description}\n'
+            f'- fail_count: {self.fail_count}\n'
+            f'- entries: {self.entries}'
+        )
+
+
+@dataclass
+class Site(ModelBase):
+    """
+    Website.
+    """
+
+    table: ClassVar = 'site'
+    base_durl: InitVar[Durl] = None
+    feeds: InitVar[dict] = None
+    links_ext: InitVar[dict] = None
+    links_int: InitVar[dict] = None
+    startpage_text: InitVar[str] = None
+
+    canonical_url: Optional[str] = None
+    base_url: Optional[str] = None
+    base_urls: list[str] = field(default_factory=list)
+    domains: list[str] = field(default_factory=list)
+    ips: Optional[list[str]] = None
+    crawl_enabled: bool = False
+    crawl_active: bool = False
+    next_full_crawl: Optional[datetime] = None
+    next_feed_crawl: Optional[datetime] = None
+    last_update: Optional[datetime] = None
+    last_pub: Optional[datetime] = None
+    pub_dates: Optional[dict[str, str]] = None
+    langs: list[str] = field(default_factory=list)
+    alt_langs: dict[str, str] = field(default_factory=dict)
+    title: Optional[str] = None
+    description: Optional[str] = None
+    keywords: list[str] = field(default_factory=list)
+    linkbacks: dict[str, str] = field(default_factory=dict)
+    meta_info: dict = field(default_factory=dict)
+    boilerplate_texts: list[str] = field(default_factory=list)
+
+    def __post_init__(
+        self,
+        base_durl: Durl,
+        feeds=None,
+        links_ext=None,
+        links_int=None,
+        startpage_text=None,
+    ):
+        self.feeds = feeds
+        self.links_ext = links_ext
+        self.links_int = links_int
+        self.startpage_text = startpage_text
+        self.keywords = self.keywords[:20]
+        if not self.last_update:
+            self.last_update = datetime.utcnow()
+        pub_date: Optional[str]
+        if self.last_pub:
+            pub_date = date.isoformat(self.last_pub.date())
+            self.pub_dates = {date.isoformat(self.last_update): pub_date}
+        else:
+            pub_date = None
+            self.pub_dates = {}
+        if base_durl:
+            self.base_urls = [base_durl.url()[:200]]
+            self.domains = [extract_domain(base_durl.hostname)[:100]]
+
+    def __str__(self):
+        return (
+            f'Site(id={self.id_}, url={self.base_url},'
+            f' crawl_enabled={self.crawl_enabled})'
+        )
+
+    async def update_base_url(self) -> None:
+        """
+        Update the base_url, choosing the most relevant URL.
+
+        If canonical_url is not None, use this.
+        Otherwise set self.base_url to the shortest from self.base_urls,
+        but requiring a https-url if there is at least one.
+        """
+        if self.canonical_url and self.canonical_url not in self.base_urls:
+            if canonical_durl := await Durl(self.canonical_url):
+                self.base_urls.append(self.canonical_url)
+                domain = extract_domain(canonical_durl.hostname)
+                if domain not in self.domains:
+                    self.domains.append(domain)
+        if self.canonical_url:
+            self.base_url = self.canonical_url
+            return
+        if not self.base_url:
+            url_candidates = self.base_urls
+            if https_urls := [
+                url for url in self.base_urls if url.startswith('https://')
+            ]:
+                url_candidates = https_urls
+            self.base_url = min(url_candidates, key=len)
+
+    async def save(  # type: ignore
+        self, conn, merge=True
+    ) -> tuple[Optional[int], bool]:
+        """
+        Store the site, optionally trying to merge it with an existing site.
+
+        Return the id of the saved instance and whether a new instance
+        was created.
+
+        If self.id_ is not 0, replace the data of the existing site with
+        this id. Else if not merge, store as new row, and if merge,
+        try to merge with an existing matching site.
+        """
+        await self.update_base_url()
+        if not merge:
+            created = not bool(self.id_)
+            await super().save(conn)
+            return self.id_, created
+        if self.id_:
+            sql = "SELECT base_urls, pub_dates FROM site WHERE id=$1"
+            row = await conn.fetchrow(sql, self.id_)
+            self.base_urls = list(
+                set(row['base_urls']).union(set(self.base_urls))
+            )
+            if previous_pub_dates := row['pub_dates']:
+                if not self.pub_dates:
+                    self.pub_dates = {}
+                self.pub_dates.update(previous_pub_dates)
+            await super().save(conn)
+            return self.id_, False
+        same_site_id = await search_same_site(self, conn)
+        if same_site_id:
+            same_site = await Site().load(conn, same_site_id)
+        if same_site_id and same_site:
+            same_site.base_urls = set(same_site.base_urls).union(
+                set(self.base_urls)
+            )
+            same_site.domains = set(same_site.domains).union(set(self.domains))
+            if self.canonical_url and not same_site.canonical_url:
+                same_site.canonical_url = self.canonical_url
+            await same_site.save(conn, merge=False)  # call ourselves
+            self.id_ = same_site.id_
+            return self.id_, False
+        else:
+            await super().save(conn)
+            return self.id_, True
+
+
+@dataclass
+class SitePath(ModelBase):
+    """
+    Path of a website. May point to a Resource.
+    """
+
+    table: ClassVar = 'site_path'
+    site: InitVar[str] = None
+
+    site_id: Optional[int] = None
+    path: Optional[str] = None
+    filtered: bool = False
+    last_visit: Optional[datetime] = None
+    ok_count: int = 0
+    canonical: Optional[bool] = None
+    resource_id: Optional[int] = None
+
+    def __str__(self):
+        return (
+            f'SitePath(id={self.id_}, site_id={self.site_id},'
+            f' path={self.path})'
+        )
+
+    async def save(self, conn: Connection):
+        """
+        Save the instance, extending the parent's method.
+        """
+        self.path = self.path[:400] if self.path else ''
+        await super().save(conn)
+
+    async def unlink_resource(self, conn, engine, index_base_name):
+        """
+        Unlink the resource and also delete it, if it has no more links.
+        """
+        if self.id_:
+            if self.resource_id:
+                sql = "SELECT COUNT(*) FROM site_path WHERE resource_id=$1"
+                ref_count = await conn.fetchval(sql, self.resource_id)
+                if ref_count == 0:
+                    sql = (
+                        "DELETE FROM resource WHERE id=$1"
+                        " RETURNING (true, lang)"
+                    )
+                    found = await conn.fetchval(sql, self.resource_id)
+                    if found:
+                        await delete_resource(
+                            engine, found[1], self.resource_id
+                        )
+                self.resource_id = None
+
+    def url(self, site):
+        """
+        Return the full URL (combine the site's base_url with our path).
+        """
+        return site.base_url + self.path
+
+
+@dataclass
+class Crawl(ModelBase):
+    """
+    The crawl process of a website (begin, end, statistics, ...).
+    """
+
+    table: ClassVar = 'crawl'
+    site_id: Optional[int] = None
+    is_full: bool = False
+    t_begin: datetime = datetime.utcnow()
+    t_end: Optional[datetime] = None
+    n_resources: int = 0
+    n_resources_new: int = 0
+
+    async def finish(self, conn, set_t_end):
+        """
+        Save the crawl. Set t_end only if indicated.
+        """
+        if set_t_end:
+            self.t_end = datetime.utcnow()
+        await self.save(conn)
+
+
+async def search_same_site(
+    site: Site,
+    conn: Connection,
+) -> Optional[int]:
+    """
+        Try to find a matching site for the given *site* and return its id.
+
+    TODO: if the path is non-trivial, require it also for the matching site
+
+        Two sites match when they return the same content for identical paths.
+        The base_url (scheme and/or netloc) may differ.
+        We do not have the content for all paths of both websites, so we need
+        to estimate: We only take into account meta information from the
+        start pages of both sites, in particular the title, description
+        and information obtained the base_urls:
+
+        We use a combination of these conditions:
+
+          1. one of the sites has a canonical URL which matches the
+             URL of the other site
+          2. the content fields (title, description) have sufficient information
+          3. the content fields match exactly
+          4. the domain matches
+          5. the domain matches, except for the TLD
+          6. the base_urls differ in their schemes (http vs. https)
+          7. the hostnames in the base_urls are identical
+          8. the hostnames in the base_urls differ by a prepended 'www.'
+          9. the IPs have at least one common address
+
+        The algorithm is this (first answer is final, yes means match):
+
+          * if (1) : yes
+          * if (2), (3), (4) : yes
+          * if (2), (3), (5), (9) : yes
+          * if (6), ((7) or (8)) : yes
+          * no
+    """
+    # rule (1)
+    if site.canonical_url:
+        sql = "SELECT id FROM site WHERE $1=ANY(base_urls) LIMIT 1"
+        id_ = await conn.fetchval(sql, site.canonical_url)
+        if id_:
+            return id_
+    else:
+        sql = "SELECT id FROM site WHERE canonical_url=ANY($1) LIMIT 1"
+        id_ = await conn.fetchval(sql, site.base_urls)
+        if id_:
+            return id_
+
+    # rule (6), ((7) or (8))
+    url_variants = set(
+        chain.from_iterable(
+            get_url_variants(base_url) for base_url in site.base_urls
+        )
+    )
+    sql = f"SELECT id FROM site WHERE base_urls && $1 LIMIT 1"
+    if id_ := await conn.fetchval(sql, url_variants):
+        return id_
+
+    # condition (2)
+    if len(site.title or '') > 15 or len(site.description or '') > 15:
+        sql = (
+            f"SELECT * FROM site WHERE"
+            f" COALESCE(title, '')=$1 AND COALESCE(description, '')=$2"
+        )
+        rows = await conn.fetch(sql, site.title or '', site.description or '')
+        # condition (3)
+        if rows:
+            # condition (4)
+            for row in rows:
+                domains = set(row.get('domains', []))
+                if domains & set(site.domains):
+                    return row['id']
+            # condition (9)
+            for row in rows:
+                ips = set(row.get('ips', []))
+                if site.ips and ips & set(site.ips):
+                    # condition (5)
+                    domains_ = row.get('domains', [])
+                    d1 = set([tldextract.extract(d).domain for d in domains_])
+                    domains_ = site.domains or []
+                    d2 = set([tldextract.extract(d).domain for d in domains_])
+                    if d1 & d2:
+                        return row['id']
+
+    return None
diff --git a/src/atextcrawler/plugin_defaults/__init__.py b/src/atextcrawler/plugin_defaults/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/atextcrawler/plugin_defaults/filter_resource_path.py b/src/atextcrawler/plugin_defaults/filter_resource_path.py
new file mode 100644
index 0000000..cb84e22
--- /dev/null
+++ b/src/atextcrawler/plugin_defaults/filter_resource_path.py
@@ -0,0 +1,22 @@
+"""
+Filter paths found in a resource.
+
+This plugin implements :func:`rp_filter`.
+"""
+
+from typing import Optional
+
+
+def rp_filter(site, durl) -> Optional[str]:
+    """
+    Adjust or filter found paths (may depend on site).
+
+    To filter out a path (i.e., not add it to table `site_path`)
+    return None.
+    """
+    path = durl.pwa()
+    # skip fetching images (linked from a tags; img tags are skipped anyway)
+    if path.lower().endswith('.jpg') or path.lower().endswith('.png'):
+        return None
+    path = path.removesuffix('?amp=1')
+    return path
diff --git a/src/atextcrawler/plugin_defaults/filter_site.py b/src/atextcrawler/plugin_defaults/filter_site.py
new file mode 100644
index 0000000..09b2282
--- /dev/null
+++ b/src/atextcrawler/plugin_defaults/filter_site.py
@@ -0,0 +1,47 @@
+"""
+Relevance estimation of sites.
+
+This plugin implements :func:`site_filter`.
+"""
+
+import re
+
+from atextcrawler.models import Site
+
+MIN_RELEVANCE_SCORE = 5
+
+
+async def site_filter(site: Site) -> bool:
+    """
+    Assess relevance of the site (using language-dependent criteria).
+
+    If the site shall be crawled, return True, else False.
+    """
+    # limit to sites in English or German language
+    if not set(['de', 'en']) & set(site.langs):
+        return False
+    score = 0.0
+    for crit_name, weight, langs, crit_re in re_criteria:
+        if '*' in langs or set(langs) & set(site.langs):
+            findings = crit_re.findall(site.startpage_text)
+            if findings:
+                score += weight * len(findings)
+            if site.title and crit_re.search(site.title):
+                score += 4 * weight
+            if site.description and crit_re.search(site.description):
+                score += 4 * weight
+
+    # TODO: add criteria for named entities (FdA-IFA, FAU, ...)
+
+    return score >= MIN_RELEVANCE_SCORE
+
+
+re_criteria = {
+    (
+        'anarch',
+        1.0,
+        ('*',),
+        re.compile('((?<!p)anarch(ie|ism|ist|y|o|a))', re.I),
+    ),
+    ('libertär', 0.5, ('de'), re.compile('(libert(är|&auml;r))', re.I)),
+}
diff --git a/src/atextcrawler/plugin_defaults/filter_site_path.py b/src/atextcrawler/plugin_defaults/filter_site_path.py
new file mode 100644
index 0000000..106afcf
--- /dev/null
+++ b/src/atextcrawler/plugin_defaults/filter_site_path.py
@@ -0,0 +1,24 @@
+"""
+Plugin for filtering paths of a site to be retrieved.
+
+This plugin implements :func:`sp_filter`.
+"""
+
+
+def sp_filter(site, path, robots) -> bool:
+    """
+    Per-site path filter. Return whether the path shall be retrieved.
+    """
+    if not robots.can_fetch_url(site.base_url + path):
+        return False
+    if 'amusewiki' in site.meta_info.get('generator', '').lower():
+        if any(
+            [
+                path.endswith(end)
+                for end in ('.html', '.epub', '.tex', '.zip', '.pdf')
+            ]
+        ):
+            return False
+        if '/bbselect?' in path:
+            return False
+    return True
diff --git a/src/atextcrawler/resource/__init__.py b/src/atextcrawler/resource/__init__.py
new file mode 100644
index 0000000..f6aee1d
--- /dev/null
+++ b/src/atextcrawler/resource/__init__.py
@@ -0,0 +1,10 @@
+from .dedup import store_boilerplate_texts
+from .feed import feed_types, update_feed
+from .fetch import ResourceFetcher
+from .operations import (
+    add_site_paths,
+    get_site_path,
+    process_site_path,
+    store_feed_entries,
+)
+from .sitemap import extract_sitemap_paths, get_sitemap_urls
diff --git a/src/atextcrawler/resource/__main__.py b/src/atextcrawler/resource/__main__.py
new file mode 100644
index 0000000..1542dfd
--- /dev/null
+++ b/src/atextcrawler/resource/__main__.py
@@ -0,0 +1,96 @@
+"""
+Dev tool for fetching and displaying a resource.
+
+Has no permanent effects.
+"""
+
+import asyncio
+import logging
+import sys
+from collections import defaultdict
+from pprint import pformat
+
+import aiohttp
+
+from ..models import Feed, TextResource
+from ..resource import ResourceFetcher
+from ..utils.annotation import pack_annotations, unpack_annotations
+from ..utils.durl import Durl
+
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler())
+logger_page_debug = logging.getLogger('atextcrawler.resource.page.debug')
+logger_page_debug.setLevel(logging.DEBUG)
+
+
+def add_tags(text, annotations):
+    """
+    Reconstruct html from text and annotations.
+
+    This is very similar to what the client does when displaying
+    a cached hit.
+    """
+    html = ''
+    opening_tags = defaultdict(list)
+    closing_tags = defaultdict(list)
+    anns_tags = sorted(
+        annotations['tags'].items(), key=lambda x: (x[0][0], -x[0][1])
+    )
+    for (i, f), anns in anns_tags:
+        opening_tags[i] += [tag for tag in reversed(anns)]
+        closing_tags[f] += [tag for tag in reversed(anns)]
+    positions = sorted(set(opening_tags.keys()) | set(closing_tags.keys()))
+    last_pos = 0
+    links = {i: href for href, (i, f, rel) in annotations['links'].items()}
+    for pos in positions:
+        html += text[last_pos:pos]
+        closing = closing_tags.get(pos, [])
+        opening = opening_tags.get(pos, [])
+        common = set(closing) & set(opening)
+        closing = [tag for tag in closing if tag not in common]
+        opening = [tag for tag in opening if tag not in common]
+        tags_html = ''
+        for tag in reversed(closing):
+            html += f'</{tag}>\n'
+        for tag in opening:
+            if tag == 'a':
+                href = links.get(pos, '#')
+                html += f'<a href="{href}">'
+            else:
+                html += f'<{tag}>'
+        last_pos = pos
+    return html
+
+
+async def run():
+    """
+    Fetch and display a resource with URL given as cmdline argument.
+    """
+    url = sys.argv[1]
+    async with aiohttp.ClientSession() as session:
+        if not (durl := await Durl(url)):
+            return
+        fetcher = ResourceFetcher(session)
+        resource = await fetcher.fetch(url)
+        if isinstance(resource, TextResource):
+            logger.warning(repr(resource))
+            logger.warning(f'Language: {resource.lang}')
+            logger.warning(pformat(resource.search_fields))
+            logger.warning(pformat(resource.init_fields))
+
+            # annotations = resource.search_fields.get('annotations')
+            # text = resource.search_fields['text']
+            # with open('/tmp/1.html', 'w') as f:
+            #    html = add_tags(text, annotations)
+            #    f.write(f'<html lang="de">\n<head><title>hhh</title></head>'
+            #            f'<body>\n{html}\n</body></html>')
+        elif isinstance(resource, Feed):
+            logger.warning(resource.debug())
+        else:
+            logger.warning(f'Resource has type {type(resource)}')
+            logger.warning(resource)
+
+
+if __name__ == '__main__':
+    asyncio.run(run())
diff --git a/src/atextcrawler/resource/dedup.py b/src/atextcrawler/resource/dedup.py
new file mode 100644
index 0000000..54998dc
--- /dev/null
+++ b/src/atextcrawler/resource/dedup.py
@@ -0,0 +1,59 @@
+"""
+Find boilerplate texts.
+"""
+
+from collections import Counter
+
+from ..models import TextResource
+from ..utils.probe import extract_samples
+from ..utils.section import iter_sections
+
+
+async def store_boilerplate_texts(fetcher, conn, site):
+    """
+    Find and store boilerplate texts of a site.
+
+    Fetch the start page and internal sample links obtained from it.
+    If there are sufficienty frequently appearing text sections,
+    consider them as boilerplate texts.
+
+    If boilerplate_texts were found, update the given site instance.
+    """
+    startpage = await fetcher.fetch(site.base_url, site=site)
+    if (
+        not isinstance(startpage, TextResource)
+        or startpage.content_type != 'html'
+    ):
+        return
+
+    # fetch sample resources
+    sample_links = extract_samples(startpage.init_fields['links_int'])
+    resources = [startpage]
+    for sample_link in sample_links:
+        if sample_link.path == site.base_url:  # avoid duplicate resources
+            continue  # NB: duplicate resources may have different paths
+        sample_resource = await fetcher.fetch(sample_link.url(), site=None)
+        if (
+            isinstance(sample_resource, TextResource)
+            and sample_resource.content_type == 'html'
+        ):
+            resources.append(sample_resource)
+
+    # find common texts in resources
+    if (n_resources := len(resources)) > 2:
+        text_freq = Counter()
+        for resource in resources:
+            text = resource.search_fields['text']
+            semantic_breaks = resource.search_fields['annotations'][
+                'semantic_breaks'
+            ]
+            for sec in iter_sections(text, semantic_breaks):
+                text_freq[sec[3]] += 1
+        boilerplate_texts = []
+        if min(text_freq.values() or [0]) == 1:  # no resource fetched twice
+            for text, freq in text_freq.items():
+                if freq > 2:
+                    boilerplate_texts.append(text)
+            sql = "UPDATE site SET boilerplate_texts=$1 WHERE id=$2"
+            await conn.execute(sql, boilerplate_texts, site.id_)
+            site.boilerplate_texts = boilerplate_texts
diff --git a/src/atextcrawler/resource/document.py b/src/atextcrawler/resource/document.py
new file mode 100644
index 0000000..4284465
--- /dev/null
+++ b/src/atextcrawler/resource/document.py
@@ -0,0 +1,131 @@
+"""
+Parse documents (often application/pdf).
+"""
+
+import logging
+import re
+from datetime import datetime
+from typing import Optional, Union
+
+from tika import parser
+
+from ..models import ResourceError, ResourceRedirect, Site, TextResource
+from ..utils.durl import Durl
+from ..utils.http import get_header_links
+from ..utils.lang import extract_content_language
+from .plaintext import annotate_text
+
+logger = logging.getLogger(__name__)
+logger_debug = logging.getLogger(__name__ + '.debug')
+logger_debug.setLevel(logging.INFO)
+
+
+re_url = re.compile(
+    r'((http|https)://[\w_-]+\.[\w_-]+(:[0-9]+)?'
+    r'([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)'
+)
+
+
+async def parse_document(
+    durl: Durl,
+    resp: dict,
+    site: Optional[Site],
+) -> Optional[Union[TextResource, ResourceError, ResourceRedirect]]:
+    """
+    Extract plain text from documents in various formats.
+    """
+    content = resp['content']
+
+    # HTTP headers, canonical URL, shortlink
+    header_links = await get_header_links(resp['headers'], durl, site)
+    if canonical := header_links.get('canonical'):
+        if canonical != durl.url():
+            return ResourceRedirect(resp['redirects'] + [canonical])
+    shortlink = header_links.get('shortlink')
+
+    # use tika to extract text
+    doc = parser.from_buffer(content)
+    # logger.debug(pformat(doc))
+    if doc.get('status') != 200:
+        msg = f'Analyzing document failed: {durl.url()}'
+        return ResourceError(msg)
+
+    # collect meta data
+    meta = doc.get('metadata', {})
+    content_type = meta.get('Content-Type')
+    if isinstance(content_type, list):
+        content_type = content_type[-1]
+    title = concat(meta.get('title'))
+    concat(meta.get('creator'))
+    last_change = extract_latest(meta.get('date') or meta.get('created'))
+    keywords = None
+
+    # text content
+    text = (doc.get('content') or '').strip()
+
+    # links
+    links_int: dict[Durl, tuple[list[str], str]] = {}
+    links_ext: dict[Durl, tuple[list[str], str]] = {}
+    for url in re_url.findall(text):
+        link_durl = await Durl(url[0])
+        if link_durl:
+            if link_durl.site() == durl.site():
+                links_int[link_durl] = [], link_durl.url()
+            else:
+                links_ext[link_durl] = [], link_durl.url()
+
+    # annotations
+    text, annotations = annotate_text(text)
+
+    return TextResource(
+        content_type=content_type,
+        last_change=last_change,
+        text_len=len(text),
+        lang=extract_content_language(text),
+        title=title,
+        init_fields={
+            'durl': durl,
+            'site': site,
+            'headers': resp['headers'],
+            'redirects': resp['redirects'],
+            'links_int': links_int,
+            'links_ext': links_ext,
+            'shortlink': shortlink,
+            'canonical': None,
+        },
+        search_fields={
+            'title': title,
+            'pub_date': last_change,
+            'keywords': keywords,
+            'text': text,
+            'annotations': annotations,
+        },
+    )
+
+
+def extract_latest(s: Optional[Union[str, list]]) -> Optional[datetime]:
+    """
+    Extract the lastest date (if any) from a string or list of strings.
+    """
+    if not s:
+        return None
+    if not isinstance(s, list):
+        s = [s]
+    dt = []
+    for t in s:
+        try:
+            dt.append(datetime.fromisoformat(t.rstrip('Z')))
+        except:
+            pass
+    return max(dt) if dt else None
+
+
+def concat(s: Optional[Union[str, list]]) -> Optional[str]:
+    """
+    Helper function for joining strings together.
+    """
+    if not s:
+        return None
+    if not isinstance(s, list):
+        s = [s]
+    return ' '.join(s)
diff --git a/src/atextcrawler/resource/feed.py b/src/atextcrawler/resource/feed.py
new file mode 100644
index 0000000..c7713bd
--- /dev/null
+++ b/src/atextcrawler/resource/feed.py
@@ -0,0 +1,155 @@
+"""
+Stuff related to feeds.
+
+Higher-level stuff is in site.feeds.
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import Optional, Union
+
+from asyncpg import Connection
+from feedparser import parse
+
+from ..models import Feed, MetaResource, ResourceError
+from ..utils.durl import Durl
+
+logger = logging.getLogger(__name__)
+
+
+feed_types = (
+    'application/rss+xml',
+    'application/atom+xml',
+    'application/feed+json',
+)
+
+
+async def update_feed(fetcher, feed, conn) -> Optional[list[dict]]:
+    """
+    Fetch, parse and return a given feed's content. Also update *feed*.
+
+    If the server replied with HTTP 410, delete the feed.
+    If there is no new information (server replied with HTTP 304),
+    return None. For other errors also return None and increase the
+    fail_count.
+    """
+    headers = {'Cache-control': 'max-age=600'}
+    if feed.modified:
+        headers['If-Modified-Since'] = feed.modified
+    elif feed.etag:
+        headers['If-None-Match'] = feed.etag.removeprefix('W/')
+    resource = await fetcher.fetch(feed.url, headers=headers)
+    if isinstance(resource, ResourceError):
+        if resource.status == 410:
+            msg = f'Feed has vanished, deleting it: {feed}'
+            logger.debug(msg)
+            await feed.delete(conn)
+        if resource.status != 304:
+            feed.fail_count += 1
+            if feed.fail_count > 5:
+                msg = f'Feed not reachable, deleting it: {feed}'
+                logger.debug(msg)
+                await feed.delete(conn)
+        return None  # HTTP 304, no new entries
+    elif isinstance(resource, Feed):
+        resource.id_ = feed.id_
+        resource.site_id = feed.site_id
+        await resource.save(conn)
+        return resource.entries
+    else:
+        return None
+
+
+def parse_json_feed(resp, data: dict) -> Feed:
+    """
+    Parse a JSON response for jsonfeed information.
+
+    TODO: handle 'next_url' (see https://jsonfeed.org/version/1.1)
+    """
+    feed = Feed()
+    feed.url = data.get('feed_url', resp['redirects'][-1])
+    feed.etag = resp['headers'].get('ETag')
+    feed.modified = resp['headers'].get('Last-Modified')
+    feed.t_visit = datetime.utcnow()
+    version = data.get('version', '')
+    version = 'json-' + version.removeprefix('https://jsonfeed.org/version/')
+    feed.version = version[:10]
+    feed.title = data.get('title')
+    feed.description = data.get('description')
+    feed.fail_count = 0
+    entries = []
+    latest = None
+    # parse feed entries to a dict compatible with feedparser's entries
+    for feed_item in data.get('items', []):
+        entry = {}
+        entry['link'] = feed_item.get('url')
+        dt = feed_item.get('date_published')
+        if dt:
+            dt = datetime.fromisoformat(dt) if dt else None
+            dt = dt.astimezone(tz=None).replace(tzinfo=timezone.utc)
+            entry['published_parsed'] = dt.timetuple()
+        entry['title'] = feed_item.get('title')
+        entry['summary'] = feed_item.get('summary')
+        entries.append(entry)
+        if dt:
+            latest = max(latest or dt, dt)
+    feed.entries = entries
+    feed.t_content = latest
+    return feed
+
+
+def parse_xml_feed(resp) -> Union[Feed, ResourceError]:
+    """
+    Parse a response from Fetcher.get_resp() for xml feed information.
+    """
+    feed = Feed()
+    feed.url = resp['redirects'][-1]
+    feed.etag = resp['headers'].get('ETag')
+    feed.modified = resp['headers'].get('Last-Modified')
+    feed.t_visit = datetime.utcnow()
+    try:
+        parsed = parse(resp['content'], response_headers=resp['headers'])
+    except Exception as error:
+        return ResourceError(f'Feedparser error: {error}')
+    latest = parsed['feed'].get('updated_parsed')
+    if latest:
+        latest = datetime(*latest[:6])
+        feed.t_content = max(feed.t_content or latest, latest)
+    feed.version = parsed['version']
+    feed.title = parsed['feed'].get('title', '')[:200] or None
+    feed.description = parsed['feed'].get('description')
+    feed.fail_count = 0
+    feed.entries = parsed['entries']
+    return feed
+
+
+def convert_feed_entries(
+    base_url: Optional[str],
+    entries: list[dict],
+) -> tuple[
+    list[tuple[str, bool]],
+    dict[str, tuple[Optional[str], Optional[str], Optional[str]]],
+]:
+    """
+    Extract paths and resource meta information from a feed's entries.
+
+    Return paths in a structure wanted by :func:`add_site_paths` and
+    resource meta information in a structure wanted by
+    :func:`update_resource_meta`.
+    """
+    paths = []
+    resource_meta = {}
+    for entry in entries:
+        if entry.get('link') and entry['link'].startswith(base_url or ''):
+            path = entry['link'].removeprefix(base_url or '').lstrip('/')
+            if len(path) <= 200:
+                last_update = entry.get('published_parsed')
+                if last_update:
+                    last_update = datetime(*last_update[:6])
+                paths.append((path, True))
+                resource_meta[path] = (
+                    last_update,
+                    entry.get('title', '')[:200] or None,
+                    entry.get('summary', '')[:2000] or None,
+                )
+    return paths, resource_meta
diff --git a/src/atextcrawler/resource/fetch.py b/src/atextcrawler/resource/fetch.py
new file mode 100644
index 0000000..f1f0f8f
--- /dev/null
+++ b/src/atextcrawler/resource/fetch.py
@@ -0,0 +1,327 @@
+"""
+Access to a resource specified by a URL.
+"""
+
+import gzip
+import logging
+from json import loads
+from traceback import format_exc
+from typing import Any, Optional, Union
+
+import aiohttp
+from bs4 import BeautifulSoup
+
+from ..models import (
+    Feed,
+    MetaResource,
+    ResourceError,
+    ResourceRedirect,
+    Site,
+    TextResource,
+)
+from ..utils.durl import Durl
+from ..utils.link import in_blacklist
+from .document import parse_document
+from .feed import parse_json_feed, parse_xml_feed
+from .page import parse_html
+from .plaintext import parse_plaintext
+from .sitemap import parse_sitemap, parse_sitemapindex
+
+logger = logging.getLogger(__name__)
+
+
+MAX_REDIRECTS = 10
+"""
+Maximum number of redirects to follow.
+"""
+
+
+default_headers = {
+    'User-Agent': 'Mozilla/5.0 (X11; Linux aarch64; rv:78.0)'
+    ' Gecko/20100101 Firefox/78.0',
+    'DNT': '1',
+    'Upgrade-Insecure-Requests': '1',
+    'Accept-Language': 'en-US,en;q=0.5, *;q=0.5',
+}
+"""
+Default HTTP client headers, overwriting those of aiohttp.ClientSession.
+"""
+
+
+blacklist_content_types = [
+    '',
+    'application/ogg',
+]
+"""
+Blacklist for content-types.
+"""
+
+
+text_content_types = {
+    'text/html': 'html',
+    'text/plain': 'plain',
+    'application/rss+xml': 'feed-rss',
+    'application/atom+xml': 'feed-atom',
+    'application/feed+json': 'feed-json',
+    'application/json': 'json',
+    'application/xml': 'xml',
+    'text/xml': 'xml',
+}
+"""
+Map content-types to parsers.
+"""
+
+
+class ResourceFetcher:
+    """
+    Fetch a resource specified by a URL (:meth:`fetch`).
+
+    The timeout is the same for all requests.
+    """
+
+    def __init__(
+        self,
+        session: aiohttp.ClientSession,
+        timeout_sock_connect: Union[int, float] = 8,
+        timeout_sock_read: Union[int, float] = 30,
+    ):
+        self.session = session
+        self.timeout = aiohttp.ClientTimeout(
+            sock_connect=timeout_sock_connect, sock_read=timeout_sock_read
+        )
+
+    async def fetch(
+        self,
+        url: str,
+        site: Optional[Site] = None,
+        redirect_history: Optional[list[str]] = None,
+        headers: Optional[dict] = None,
+    ) -> Union[
+        None, MetaResource, TextResource, ResourceError, ResourceRedirect
+    ]:
+        """
+        Try to fetch a resource and return an instance or error or redirect.
+
+        If an error was encountered, return a ResourceError.
+        If the resource has an irrelevant content type, return None.
+        Otherwise return a specific content instance.
+
+        Argument *redirect_history* contains the redirect history;
+        if one of the redirects is encountered again, return None.
+        """
+        if redirect_history is None:
+            redirect_history = []
+        if not (durl := await Durl(url)):
+            return ResourceError('Invalid URL')
+        resp = await self.get_resp(
+            durl,
+            redirect_history=redirect_history,
+            headers=headers,
+        )
+        if isinstance(resp, ResourceError):
+            return resp
+        if resp is None:
+            return None
+        result = await self._parse(durl, site, resp)
+        if isinstance(result, (MetaResource, TextResource)):
+            result.id_ = None
+        return result
+
+    async def _parse(
+        self, durl, site, resp, in_recursion=False
+    ) -> Union[
+        None, MetaResource, TextResource, ResourceError, ResourceRedirect
+    ]:
+        """
+        Parse a response. May call itself.
+        """
+        result: Union[
+            None, MetaResource, TextResource, ResourceError, ResourceRedirect
+        ] = None
+        content = resp['content']
+        if isinstance(content, str) and content.startswith('<?xml '):
+            result = await parse_xml(durl, resp)
+        elif resp['parser'] == 'feed-rss':
+            result = await parse_xml(durl, resp, rss=True)
+        elif resp['parser'] == 'feed-atom':
+            result = await parse_xml(durl, resp, atom=True)
+        elif resp['parser'] == 'xml':
+            result = await parse_xml(durl, resp)
+        elif resp['parser'] == 'html':
+            result = await parse_html(durl, resp, site)
+        elif resp['parser'] in ('json', 'feed-json'):
+            result = await parse_json(durl, resp)
+        elif resp['parser'] == 'plain':
+            result = await parse_plaintext(durl, resp, site)
+        elif resp['parser'] == 'application':
+            if resp['headers'].get('content-type') == 'application/x-gzip':
+                if in_recursion:
+                    return None  # consider nested gzip an attack
+                resp['content'] = gzip.decompress(resp['content'])
+                return await self._parse(durl, site, resp, in_recursion=True)
+            result = await parse_document(durl, resp, site)
+        if isinstance(result, ResourceRedirect):
+            redir_url = result.urls[-1]
+            result = await self.fetch(
+                redir_url,
+                site=site,
+                redirect_history=result.urls[:-1],
+            )
+        return result
+
+    async def get_resp(
+        self,
+        durl: Durl,
+        headers: dict = None,
+        redirect_history: Optional[list[str]] = None,
+    ) -> Optional[Union[ResourceError, dict]]:
+        """
+        Try to fetch a url returning a ResourceError or a dict with content.
+
+        Optional *headers* will overwrite the :var:`default_headers`.
+
+        If the response status is not 200, always return an ResourceError.
+
+        If the content-type is not relevant (see blacklist_content_types),
+        return None.
+
+        The dict contains these keys+values:
+
+          * 'parser': a hint on the parser to use for analyzing the content;
+             one of 'html', 'plain', 'feed', 'xml', 'application'
+          * 'content': bytes for type application, otherwise str
+          * 'redirects': a list of URLs visited during HTTP redirection,
+                         the last item is the final URL
+          * 'headers': response headers
+        """
+        if redirect_history is None:
+            redirect_history = []
+        if len(redirect_history) >= MAX_REDIRECTS:
+            return None
+        headers_ = default_headers.copy()
+        if headers:
+            headers_.update(headers)
+        try:
+            async with self.session.get(
+                durl.url(),
+                headers=headers_,
+                timeout=self.timeout,
+            ) as resp:
+                redirects = [durl.url()]
+                if resp.history:
+                    href = resp.history[-1].headers.get('location')
+                    if not href or not (redurl := await Durl(href, base=durl)):
+                        msg = 'Invalid URL after HTTP redirect'
+                        return ResourceError(msg)
+                    if in_blacklist(redurl.hostname):
+                        src_url = (
+                            redirect_history[0]
+                            if redirect_history
+                            else durl.url()
+                        )
+                        msg = (
+                            f'Dropping URL {src_url}, since'
+                            f' redirected to a blacklisted site'
+                        )
+                        logger.debug(msg)
+                        return None
+                    redirects = [str(r.url) for r in resp.history]
+                    redirects.append(redurl.url())
+                if join := set(redirect_history) & set(redirects):
+                    msg = f'Cyclic redirect {join}'
+                    return ResourceError(msg)
+                if resp.status != 200:
+                    msg = f'HTTP status {resp.status}'
+                    return ResourceError(
+                        msg, status=resp.status, headers=headers
+                    )
+                c_type = resp.headers.get('content-type', '').split(';')[0]
+                if c_type in blacklist_content_types:
+                    return None
+                result: dict[str, Any] = {
+                    'redirects': redirect_history + redirects,
+                    'headers': resp.headers,
+                }
+                if c_type in text_content_types.keys():
+                    try:  # catch decoding issues
+                        content = await resp.text()
+                    except:
+                        body = await resp.read()
+                        encoding = resp.charset or 'utf-8'
+                        encoding = encoding.replace('CP-1250', 'cp1250')
+                        content = body.decode(encoding, errors='replace')
+                    result['content'] = content
+                    result['parser'] = text_content_types[c_type]
+                    return result
+                elif c_type.startswith('application/'):
+                    result['content'] = await resp.read()
+                    result['parser'] = 'application'
+                    return result
+        except aiohttp.ClientError as error:
+            # on certificate error try without tls
+            if 'SSLCertVerificationError' in str(error):
+                if durl.scheme == 'https':
+                    url = durl.url()
+                    durl.replace_scheme('http')
+                    response = await self.get_resp(
+                        durl=durl,
+                        headers=headers,
+                        redirect_history=redirect_history + [url],
+                    )
+                    if not isinstance(response, ResourceError):
+                        return response
+            msg = f'ClientError: {error}'
+            return ResourceError(msg)
+        except Exception as error:
+            msg = f'Unknown error: {error}:\n{format_exc()}'
+            logger.error(msg)
+            return ResourceError(msg)
+        return None
+
+
+async def parse_xml(
+    durl: Durl,
+    response: dict,
+    rss=False,
+    atom=False,
+) -> Optional[Union[MetaResource, ResourceError]]:
+    """
+    Parse XML content.
+
+    In particular, parse sitemapindex, sitemap, RSS feed, atom feed.
+    """
+    try:
+        xml = response['content']
+        soup = BeautifulSoup(xml, 'html.parser')
+    except:
+        return None
+    if rss or (rss := soup.find('rss')):
+        return parse_xml_feed(response)
+    elif atom or (atom := soup.find('atom')):
+        return parse_xml_feed(response)
+    elif sitemapindex := soup.find('sitemapindex'):
+        return parse_sitemapindex(sitemapindex)
+    elif urlset := soup.find('urlset'):
+        return parse_sitemap(urlset)
+    else:
+        return None
+
+
+async def parse_json(
+    durl: Durl,
+    response: dict,
+) -> Optional[Union[Feed, ResourceError]]:
+    """
+    Parse the content of JSON feeds.
+    """
+    try:
+        data = loads(response['content'])
+    except:
+        msg = f'Could not parse JSON from {durl.url()}'
+        logger.debug(msg)
+        return None
+    if not isinstance(data, dict):
+        return None
+    if data.get('version', '').startswith('https://jsonfeed.org/'):
+        return parse_json_feed(response, data)
+    return None
diff --git a/src/atextcrawler/resource/operations.py b/src/atextcrawler/resource/operations.py
new file mode 100644
index 0000000..dffe2bc
--- /dev/null
+++ b/src/atextcrawler/resource/operations.py
@@ -0,0 +1,347 @@
+"""
+Operations on resources.
+"""
+
+import logging
+from datetime import datetime
+from typing import Optional, Sequence
+
+from asyncpg import Connection
+
+from ..models import (
+    Feed,
+    MetaResource,
+    ResourceError,
+    Site,
+    Sitemap,
+    SitemapIndex,
+    SitePath,
+    TextResource,
+)
+from ..search import delete_resource, index_resource
+from ..tensorflow import TensorFlow
+from ..utils.durl import Durl
+from ..utils.similarity import (
+    create_simhash,
+    search_simhash,
+    simhash_from_bigint,
+    simhash_to_bigint,
+)
+from .feed import convert_feed_entries
+from .fetch import ResourceFetcher
+from .sitemap import extract_sitemap_paths
+
+logger = logging.getLogger(__name__)
+
+
+async def add_site_paths(
+    conn: Connection,
+    site_id: int,
+    paths: Sequence[tuple[str, Optional[bool]]],
+) -> None:
+    """
+    Add site paths. if resource infos are given, also create resources.
+
+    The paths must be given as relative paths and together with a boolean
+    telling whether the link is a canonical link.
+    """
+    sql = (
+        "INSERT INTO site_path (site_id, path, canonical)"
+        " VALUES ($1, $2, $3) ON CONFLICT (site_id, path) DO NOTHING"
+    )
+    values = (
+        (site_id, path, canonical)
+        for path, canonical in paths[:100000]
+        if len(path) <= 400
+    )
+    await conn.executemany(sql, values)
+
+
+async def update_resource_meta(
+    conn: Connection,
+    site_id: int,
+    resource_meta: dict,
+) -> None:
+    """
+    Update meta information of existing resources using path to find them.
+    """
+    sql = (
+        "UPDATE resource SET last_change=coalesce($1, last_change),"
+        " title=coalesce($2, title), summary=coalesce($3, summary) FROM ("
+        " SELECT resource_id FROM site_path WHERE site_id=$4 AND path=$5"
+        ") sp WHERE resource.id=sp.resource_id"
+    )
+    values = ((*meta, site_id, path) for path, meta in resource_meta.items())
+    await conn.executemany(sql, values)
+
+
+async def store_feed_entries(
+    conn: Connection,
+    site: Site,
+    entries: list[dict],
+) -> None:
+    """
+    Add missing resources of a site from given feed entries.
+    """
+    if site.id_:
+        paths, resource_meta = convert_feed_entries(site.base_url, entries)
+        await add_site_paths(conn, site.id_, paths)
+        await update_resource_meta(conn, site.id_, resource_meta)
+
+
+async def get_site_path(
+    conn: Connection,
+    site: Site,
+    before: datetime,
+    only_new=False,
+) -> Optional[SitePath]:
+    """
+    Return the next path of a given site that needs to be processed.
+
+    If none needs to be processed, return None.
+
+    Only return paths that have last been visited before *before*
+    or not been processed at all. Paths with a ok_count of -3 or lower
+    are dropped.
+
+    If *only_new*, limit to paths that have not been processed at all,
+    irrespective of the value of *before*.
+    """
+    if only_new:
+        sql = (
+            "SELECT * FROM site_path"
+            " WHERE site_id=$1 AND last_visit is null LIMIT 1"
+        )  # implicitly canonical=null
+        row = await conn.fetchrow(sql, site.id_)
+    else:
+        sql = (
+            "SELECT * FROM site_path"
+            " WHERE site_id=$1 AND canonical IS NOT false AND"
+            " (last_visit is null OR last_visit<$2) AND"
+            " ok_count > -3 LIMIT 1"
+        )  # canonical can be true or null
+        row = await conn.fetchrow(sql, site.id_, before)
+    if row:
+        return await SitePath().load_from_row(row)
+    return None
+
+
+async def process_site_path(
+    app,
+    worker_number: int,
+    conn: Connection,
+    fetcher: ResourceFetcher,
+    tf: TensorFlow,
+    site: Site,
+    site_path: SitePath,
+) -> bool:
+    """
+    Fetch a path, deduplicate and if canonical, update and index the resource.
+
+    Return whether a new resource was handled that should contribute be
+    statistics.
+    """
+    msg = (
+        f'Worker {worker_number} processing site {site.id_}'
+        f' site_path {site_path.id_} {site.base_url}{site_path.path}'
+    )
+    logger.debug(msg)
+    if not site.id_:  # only to satisfy typing
+        return False
+
+    # fetch url
+    site_path.last_visit = datetime.utcnow()
+    url = site_path.url(site)
+    resource = await fetcher.fetch(url, site=site)
+
+    # handle failure (possibly deleting old information)
+    if not isinstance(resource, (TextResource, MetaResource)):
+        if not resource:  # irrelevant content-type
+            site_path.ok_count = -10
+        elif isinstance(resource, ResourceError):
+            site_path.ok_count -= 1
+        if site_path.ok_count <= -3 and site_path.resource_id:
+            await site_path.unlink_resource(
+                conn,
+                app.search_engine,
+                app.config['elasticsearch']['index_base_name'],
+            )
+        await site_path.save(conn)
+        if resource:  # relevant content-type
+            msg = (
+                f'Worker {worker_number} failed to process site_path'
+                f' {site_path.id_} (site {site.id_},'
+                f' {site.base_url}{site_path.path})'
+            )
+            logger.info(msg)
+        return False
+
+    # handle MetaResources
+    if isinstance(resource, MetaResource):
+        if isinstance(resource, Feed):
+            resource.site_id = site.id_
+            await resource.save(conn)
+            if resource.entries:
+                await store_feed_entries(conn, site, resource.entries)
+        elif isinstance(resource, Sitemap):
+            paths, _ = extract_sitemap_paths(site.base_url, resource.urls)
+            await add_site_paths(conn, site.id_, paths)
+        elif isinstance(resource, SitemapIndex):
+            for sitemap_dict in resource.sitemaps:
+                url = sitemap_dict['loc']
+                res_sitemap = await fetcher.fetch(url, site=site)
+                if isinstance(res_sitemap, Sitemap):
+                    paths, _ = extract_sitemap_paths(
+                        site.base_url, res_sitemap.urls
+                    )
+                    await add_site_paths(conn, site.id_, paths)
+        return False
+
+    # handle TextResource
+    relevant, is_new_resource = await _handle_text_resource(
+        app, conn, tf, site, site_path, resource, url
+    )
+    if not relevant:
+        return False
+    site_path.resource_id = resource.id_
+    site_path.canonical = resource.init_fields.get('canonical')
+    site_path.ok_count += 1
+    await site_path.save(conn)
+
+    if shortlink_url := resource.init_fields.get('shortlink'):
+        await _save_shortlink(
+            conn, site, url, resource, shortlink_url, site_path.last_visit
+        )
+
+    return is_new_resource
+
+
+async def _handle_text_resource(
+    app, conn, tf, site, site_path, resource, url
+) -> tuple[bool, bool]:
+    """
+    Ingest a text resource.
+
+    Return whether the resource is relevant and whether it is new.
+    """
+    # save the resource's internal links
+    paths = []
+    if links_int := resource.init_fields['links_int']:
+        for durl, (rel, _) in links_int.items():
+            rp_filter = app.plugins['filter_resource_path'].rp_filter
+            if path := rp_filter(site, durl):
+                canon = (rel and rel.lower() == 'canonical') or None
+                paths.append((path, canon))
+        await add_site_paths(conn, site.id_, paths)
+
+    # find resources similar to the current text
+    text = resource.search_fields['text']
+    if len(text) < 300:  # discard resources with too short texts
+        site_path.resource_id = None
+        await site_path.save(conn)
+        return False, False
+    simhash = simhash_from_bigint(resource.simhash)
+    index = site.simhash_index
+    similar_ids = search_simhash(index, simhash)
+
+    # determine the destination resource and resources to be merged into it
+    old_id = site_path.resource_id
+    if (
+        old_id
+        and old_id in similar_ids
+        and (  # similar to old text
+            dest_resource := await TextResource().load(conn, old_id)
+        )
+    ):
+        merge_ids = list(filter(lambda elem: elem != old_id, similar_ids))
+    else:  # no old text, or old text not similar any more
+        if old_id:
+            await site_path.unlink_resource(
+                conn,
+                app.search_engine,
+                app.config['elasticsearch']['index_base_name'],
+            )
+        # find the first existing similar resource
+        for similar_id in similar_ids:
+            dest_resource = await TextResource().load(conn, similar_id)
+            if dest_resource:
+                # also require similar length
+                l1 = len(resource.search_fields['text'])
+                l2 = dest_resource.text_len
+                if 0.95 * l2 <= l1 <= 1.05 * l2:
+                    merge_ids = list(
+                        filter(lambda elem: elem != similar_id, similar_ids)
+                    )
+                    break
+        else:
+            dest_resource = None
+            merge_ids = []
+
+    # update or create the destination resource
+    if dest_resource:
+        is_new_resource = False
+        resource.simhash = create_simhash(index, dest_resource.id_, simhash)
+        await dest_resource.update_from_resource(resource)
+        resource = dest_resource
+    else:
+        is_new_resource = True
+        resource.simhash = simhash_to_bigint(simhash)
+        await resource.save(conn)
+        create_simhash(index, resource.id_, simhash)
+
+    # add resource to search index
+    if resource.content_type in ('html', 'plain'):
+        await index_resource(
+            app.search_engine,
+            tf,
+            site_path,
+            resource,
+            site.base_url,
+            url,
+        )
+
+    # merge resources: merge_ids -> resource
+    for merge_id in merge_ids:
+        # replace links to the merge resource with links to the dest resource
+        sql = "UPDATE site_path SET resource_id=$1 WHERE resource_id=$2"
+        await conn.execute(sql, resource.id_ or None, merge_id)
+        # remove orphaned merge resource
+        sql = "DELETE FROM resource WHERE id=$1 RETURNING (true, lang)"
+        found = await conn.fetchval(sql, merge_id)
+        if found:
+            await delete_resource(
+                app.search_engine,
+                found[1],
+                merge_id,
+            )
+
+    return True, is_new_resource
+
+
+async def _save_shortlink(
+    conn, site, url, resource, shortlink_url, last_visit
+):
+    """
+    Save a shortlink.
+    """
+    shortlink_durl = await Durl(shortlink_url, base=site.base_url)
+    if shortlink_durl and shortlink_url != url:
+        sql = "SELECT * FROM site_path WHERE site_id=$1 AND path=$2"
+        sl_path = shortlink_durl.pwa()
+        row = await conn.fetchrow(sql, site.id_, sl_path)
+        shortlink = await SitePath().load_from_row(row)
+        if not shortlink:
+            shortlink = SitePath(
+                site_id=site.id_,
+                path=sl_path,
+                last_visit=last_visit,
+                ok_count=1,
+                canonical=False,
+                resource_id=resource.id_,
+            )
+        else:
+            shortlink.last_visit = last_visit
+            shortlink.ok_count += 1
+            shortlink.canonical = False
+            shortlink.resource_id = resource.id_
+        await shortlink.save(conn)
diff --git a/src/atextcrawler/resource/page.py b/src/atextcrawler/resource/page.py
new file mode 100644
index 0000000..540a023
--- /dev/null
+++ b/src/atextcrawler/resource/page.py
@@ -0,0 +1,355 @@
+"""
+Parse HTML pages.
+"""
+
+import logging
+from copy import deepcopy
+from typing import Optional, Union
+
+from bs4 import BeautifulSoup
+from tidylib import tidy_document
+
+from ..models import ResourceError, ResourceRedirect, Site, TextResource
+from ..utils.annotation import (
+    annotate,
+    annotations_remove_section,
+    clean_annotations,
+    get_tag_counts,
+    headline_probability,
+)
+from ..utils.date_finder import extract_latest_date
+from ..utils.durl import Durl, assort_links
+from ..utils.html import (
+    clean_body,
+    clean_page,
+    extract_title,
+    get_html_lang,
+    get_html_redirect,
+)
+from ..utils.http import get_header_links
+from ..utils.lang import extract_content_language
+from ..utils.section import iter_sections
+from ..utils.tag import keep_tags
+
+logger = logging.getLogger(__name__)
+logger_debug = logging.getLogger(__name__ + '.debug')
+logger_debug.setLevel(logging.INFO)
+logger_links = logging.getLogger(__name__ + '.debug.links')
+logger_stats = logging.getLogger(__name__ + '.debug.stats')
+logger_sections = logging.getLogger(__name__ + '.debug.sections')
+
+
+async def parse_html(
+    durl: Durl,
+    resp: dict,
+    site: Optional[Site],
+) -> Optional[Union[TextResource, ResourceError, ResourceRedirect]]:
+    """
+    Extract relevant data from a response returning a TextResource instance.
+
+    The given URL must be the full URL (incl. scheme and netloc) of the page.
+    """
+    html = resp['content']
+
+    # follow link to canonical URL
+    header_links = await get_header_links(resp['headers'], durl, site)
+    if canonical := header_links.get('canonical'):
+        if canonical != durl.url():
+            return ResourceRedirect(resp['redirects'] + [canonical])
+
+    # follow html redirect, if present
+    if redir_url := get_html_redirect(html):
+        if redir_url not in resp['redirects']:
+            return ResourceRedirect(resp['redirects'] + [redir_url])
+        else:
+            msg = f'Cyclic HTML redirect: {redir_url} in {resp["redirects"]}'
+            return ResourceError(msg)
+
+    # require html tag
+    if not html[:14].lower().startswith('<!doctype html'):
+        if '<html' not in html:
+            return None
+
+    # real URL after redirection
+    url = resp['redirects'][-1]
+    durl = await Durl(url)
+    if not durl:
+        return None
+
+    # page title
+    title = extract_title(html)
+
+    # tidy html
+    try:
+        html, _ = tidy_document(
+            html.encode('utf-8'),
+            options={
+                'logical-emphasis': 1,
+                'merge-divs': 1,
+                'merge-spans': 1,
+                'hide-comments': 1,
+                'output-bom': 0,
+                'show-errors': 0,
+            },
+        )
+        html = html.decode('utf-8')
+    except:
+        msg = f'Cannot tidy html from {url}'
+        return ResourceError(msg)
+
+    # drop irrelevant tags, including their contents
+    soup = clean_page(html)
+
+    # extract shortlink (from http headers or html head)
+    shortlink = header_links.get('shortlink')
+    if not shortlink and soup.head:
+        for link in soup.head.find_all('link'):
+            if 'shortlink' in link.get('rel', ''):
+                if link.get('href'):
+                    shortlink = link.get('href')
+                    break
+
+    # language, plaintext, annotations, last change
+    lang = get_html_lang(html)
+    html = clean_body(str(soup.body))
+    head = soup.head
+    text, annotations = annotate(html)
+    if lng := extract_content_language(text):
+        lang = lng
+    last_change = extract_latest_date(html, lang=lang)
+
+    # assort internal and external links
+    base_url = None
+    if head and head.base:
+        base_url = head.base.get('href')
+    if not base_url and site:
+        base_url = site.base_url
+    cleaned_links, links_int, links_ext = await assort_links(
+        annotations['links'], durl, text, base_url
+    )
+    annotations['links'] = cleaned_links
+    if logger_links.isEnabledFor(logging.DEBUG):
+        logger_links.debug('==== internal links')
+        for durl_, txt in links_int.items():
+            logger_links.debug(f'{durl_.url()} {txt}')
+        logger_links.debug('==== external links')
+        for durl_, txt in links_ext.items():
+            logger_links.debug(f'{durl_.url()} {txt}')
+
+    # keywords from category links
+    category_links = set()
+    for href, (i, f, rel) in annotations['links'].items():
+        if rel and ('category' in rel or 'tag' in rel):
+            category_links.add(text[i:f])
+    keywords = sorted(category_links)
+
+    # filter out irrelevant sections
+    filtered_text, filtered_ann = filter_sections(
+        text, annotations, site.boilerplate_texts if site else None
+    )
+
+    # debug statistics
+    if logger_stats.isEnabledFor(logging.DEBUG):
+        sb = annotations['semantic_breaks']
+        fsb = filtered_ann['semantic_breaks']
+        logger_stats.debug(
+            f'Page statistics:'
+            f' html_len={len(html)} text_len={len(filtered_text)}'
+            f' ratio={len(filtered_text) / len(html):.2f};'
+            f' sections={len(sb)} filtered_sections={len(fsb)}'
+            f' ratio={len(fsb) / len(sb):.2f} url={durl.url()}'
+        )
+
+    return TextResource(
+        content_type='html',
+        last_change=last_change,
+        text_len=len(text),
+        lang=lang,
+        title=title,
+        init_fields={
+            'durl': durl,
+            'site': site,
+            'headers': resp['headers'],
+            'redirects': resp['redirects'],
+            'links_int': links_int,
+            'links_ext': links_ext,
+            'shortlink': shortlink,
+            'canonical': True if canonical else None,
+            'head': head,
+        },
+        search_fields={
+            'title': title,
+            'pub_date': last_change,
+            'keywords': keywords,
+            'text': filtered_text,
+            'annotations': filtered_ann,
+            'head': str(head),
+        },
+    )
+
+
+def filter_sections(text, annotations, boilerplate_texts):
+    """
+    Filter out irrelevant sections using scores and factoring in neighbors.
+    """
+    tags = annotations['tags']
+    sb = annotations['semantic_breaks']
+    section_ids = annotations['section_ids']
+
+    # for i1,f1 in sorted(tags.keys()):
+    #    print('           ', i1,f1,tags[(i1,f1)], text[i1:f1])
+    # for i, f, lvl, txt in iter_sections(text, sb, max_level=60):
+    #    print('-' * lvl, i,f,','.join(tags[(i+1, f)]), sb[i], txt)
+    # print('_' * 50)
+    # from pprint import pprint
+    # pprint(sb)
+    # pprint(tags)
+    # pprint(section_ids)
+
+    # calculate keep scores for sections
+    # negative scores mean: drop; positive scores mean keep;
+    # scores between -2 and 2 are undecided
+    sections_keep = {}
+    headline_probs = {}
+    for i, f, lvl, txt in iter_sections(text, sb, max_level=60):
+        if prob := headline_probability(txt, tags[(i, f)], lvl):
+            headline_probs[(i, f)] = prob
+        w = 0
+        n_chars = f - i - 1
+        # string length
+        w = (n_chars - 80) / 80  # initial weight
+        # punctuation
+        w += 0.4 * text.count('.') + 0.1 * text.count(',')
+        # p tag
+        if 'p' in tags[(i + 1, f)]:  # prefer keeping paragraphs
+            w += 0.7
+        # links
+        n_links, link_density, avg_text_len = get_tag_counts(
+            ('a',), i, f, tags, text
+        )
+        if link_density > 0.5:
+            w = -n_links
+        elif link_density > 0.3 and avg_text_len < 60:
+            w = -3
+        else:
+            n_li, li_density, li_len = get_tag_counts(
+                ('li',), i, f, tags, text
+            )
+            if link_density > 0.2 and li_density > 0.8 and li_len < 50:
+                w = -3
+        if 52 <= lvl < 60:
+            w = max(w, 1.0)
+        if 'sidebar' in ' '.join(section_ids.get(i, [])):
+            w = -3
+        if len(txt) < 20 and ('RSS' in txt or 'MENU' in txt):
+            w = -3
+        # special chars
+        if txt.startswith('←') or txt.endswith('→'):  # wordpress navigation
+            w = -3
+        # remove boilerplate texts
+        if boilerplate_texts and txt in boilerplate_texts:
+            w = -10
+        sections_keep[(i, f)] = w, lvl
+
+    # amend keep scores: look at preceding / subsequent sections with
+    # equal level and transfer their keep scores to the current section
+    n = len(sections_keep)
+    sections = list(sorted(sections_keep.keys()))
+    # inspect subsequent sections:
+    for rev_ind, s_range in enumerate(reversed(sections)):
+        ind = n - 1 - rev_ind
+        w, lvl = sections_keep[s_range]
+        if abs(w) <= 2:
+            w_sum = 0
+            n_peers = 0
+            for i in range(ind + 1, min(n, ind + 15)):
+                w_, lvl_ = sections_keep[sections[i]]
+                if lvl_ != lvl:
+                    break
+                n_peers += 1
+                w_sum += w_
+            if n_peers >= 3:
+                sections_keep[s_range] = w + 2 * w_sum / n_peers, lvl
+    # inspect preceding sections:
+    for ind, s_range in enumerate(sections):
+        w, lvl = sections_keep[s_range]
+        if abs(w) <= 2:
+            w_sum = 0
+            n_peers = 0
+            for i in range(ind - 1, max(0, ind - 15), -1):
+                w_, lvl_ = sections_keep[sections[i]]
+                if lvl_ != lvl:
+                    break
+                n_peers += 1
+                w_sum += w_
+            if n_peers >= 3:
+                sections_keep[s_range] = w + 2 * w_sum / n_peers, lvl
+
+    # amend keep scores: look at sections that could be headlines
+    # for subsequent kept sections and increase their score;
+    # also allow for up to 2 sections inbetween (which will also
+    # have their score increased)
+    for rev_ind, s_range in enumerate(reversed(sections)):
+        ind = n - 1 - rev_ind
+        w, lvl = sections_keep[s_range]
+        if abs(w) <= 2:
+            if headline_probs.get(s_range, 0) > 0.49:
+                # look at subsequent sections with higher level
+                child_weights = []
+                for i in range(ind + 1, n):
+                    w_, lvl_ = sections_keep[sections[i]]
+                    if lvl_ <= lvl or w_ < -2:
+                        break
+                    child_weights.append(w_)
+                if nc := len(child_weights):
+                    child_avg = sum(child_weights) / nc
+                    if w + 1.2 * child_avg > 2:
+                        sections_keep[s_range] = w + 1.2 * child_avg, lvl
+                        if nc > 1:
+                            if (w1 := child_weights[0]) <= 2:
+                                sections_keep[sections[ind + 1]] = (
+                                    w1 + 1.5 * child_avg,
+                                    lvl,
+                                )
+                        if nc > 2:
+                            if (w2 := child_weights[1]) <= 2:
+                                sections_keep[sections[ind + 2]] = (
+                                    w2 + 2 * child_avg,
+                                    lvl,
+                                )
+
+    # clean annotations
+    clean_annotations(annotations)
+
+    # debug sections
+    if logger_sections.isEnabledFor(logging.DEBUG):
+        logger_sections.debug('============= Weighted sections =============')
+        for i, f, lvl, txt in iter_sections(text, sb, max_level=60):
+            w, lvl = sections_keep[(i, f)]
+            indent = ('+' if w > 2 else '-') * lvl
+            ts = ','.join(tags[(i + 1, f)])
+            logger_sections.debug(f'{indent} {i} {f} {ts} {txt} {w:.2f}')
+
+    # narrow down annotations and text to keep_sections
+    # drop undecided sections
+    filtered_text = text
+    filtered_ann = deepcopy(annotations)
+    for i, f in sorted(sections_keep.keys(), reverse=True):
+        w, lvl = sections_keep[(i, f)]
+        if w <= 2.0:
+            filtered_ann = annotations_remove_section(filtered_ann, i, f)
+            filtered_text = filtered_text[:i] + filtered_text[f:]
+    clean_annotations(filtered_ann)
+
+    # debug filtered sections
+    if logger_sections.isEnabledFor(logging.DEBUG):
+        logger_sections.debug('')
+        logger_sections.debug('============= Filtered sections =============')
+        fsb = filtered_ann['semantic_breaks']
+        ftags = filtered_ann['tags']
+        for i, f, lvl, txt in iter_sections(filtered_text, fsb, max_level=100):
+            indent = ' ' * lvl
+            ts = ','.join(ftags.get((i + 1, f), []))
+            logger_sections.debug(f'{indent} {lvl} {i} {f} {ts} {txt}')
+
+    return filtered_text, filtered_ann
diff --git a/src/atextcrawler/resource/plaintext.py b/src/atextcrawler/resource/plaintext.py
new file mode 100644
index 0000000..1514508
--- /dev/null
+++ b/src/atextcrawler/resource/plaintext.py
@@ -0,0 +1,148 @@
+"""
+Parse plaintext pages.
+"""
+
+import logging
+import re
+from typing import Any, Optional, Union
+
+import pypandoc
+
+from ..models import ResourceError, ResourceRedirect, Site, TextResource
+from ..utils.annotation import annotate
+from ..utils.date_finder import extract_latest_date
+from ..utils.durl import Durl
+from ..utils.http import get_header_links
+from ..utils.lang import extract_content_language
+from ..utils.muse import parse_muse
+
+logger = logging.getLogger(__name__)
+
+
+MAX_LINK_TEXT_LENGTH = 100
+"""
+Maximum length of a link's text to be kept.
+
+Cf. table site_link, column link_text.
+"""
+
+
+re_url = re.compile(
+    r'((http|https)://[\w_-]+\.[\w_-]+(:[0-9]+)?'
+    r'([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)'
+)
+
+
+re_nl = re.compile(r'\r\n')
+
+
+re_ws = re.compile(r'\s*\n\s*\n\s*')
+
+
+re_nn = re.compile(r'\n\n')
+
+
+async def parse_plaintext(
+    durl: Durl,
+    resp: dict,
+    site: Optional[Site],
+) -> Optional[Union[ResourceRedirect, TextResource]]:
+    """
+    Extract relevant data from a response returning a TextResource instance.
+
+    The given URL must be the full URL (incl. scheme and netloc) of the page.
+    """
+    text = resp['content']
+
+    # HTTP headers, canonical URL, shortlink
+    header_links = await get_header_links(resp['headers'], durl, site)
+    if canonical := header_links.get('canonical'):
+        if canonical != durl.url():
+            return ResourceRedirect(resp['redirects'] + [canonical])
+    shortlink = header_links.get('shortlink')
+
+    if not text:
+        return None
+
+    text = re_nl.sub('\n', text)
+    text = re_ws.sub('\n\n', text)
+
+    # meta info
+    meta: dict[str, Any] = {}
+    muse = None
+    if durl.path.endswith('.muse'):
+        muse = parse_muse(text)
+        if muse:
+            meta, text = muse
+    # title
+    if not meta.get('title'):
+        meta['title'] = text[:200].splitlines()[0]
+    # content language
+    if not meta.get('lang'):
+        meta['lang'] = extract_content_language(text)
+    # publication date
+    if not meta.get('pub_date'):
+        meta['pub_date'] = extract_latest_date(text, lang=meta.get('lang'))
+
+    # links
+    links_int: dict[Durl, tuple[list[str], str]] = {}
+    links_ext: dict[Durl, tuple[list[str], str]] = {}
+    for url in re_url.findall(text):
+        link_durl = await Durl(url[0])
+        if link_durl:
+            if link_durl.site() == durl.site():
+                links_int[link_durl] = [], link_durl.url()
+            else:
+                links_ext[link_durl] = [], link_durl.url()
+
+    if muse:
+        html = pypandoc.convert_text(text, 'html5', format='muse').strip()
+        text, annotations = annotate(html)
+    else:
+        text, annotations = annotate_text(text)
+
+    return TextResource(
+        content_type=resp['parser'],
+        last_change=meta.get('pub_date'),
+        text_len=len(text),
+        lang=meta.get('lang'),
+        title=meta.get('title'),
+        init_fields={
+            'durl': durl,
+            'site': site,
+            'headers': resp['headers'],
+            'redirects': resp['redirects'],
+            'links_int': links_int,
+            'links_ext': links_ext,
+            'shortlink': shortlink,
+            'canonical': None,
+        },
+        search_fields={
+            'title': meta.get('title'),
+            'authors': meta.get('authors'),
+            'pub_date': meta.get('pub_date'),
+            'keywords': meta.get('keywords'),
+            'summary': meta.get('summary'),
+            'text': text,
+            'annotations': annotations,
+        },
+    )
+
+
+def annotate_text(text):
+    """
+    Return annoations as :func:`utils.annotation.annotate`does.
+
+    Here we only have information on semantic breaks
+    (in plaintext they are where empty lines are).
+    """
+    semantic_breaks = {}
+    for match in re_nn.finditer(text):
+        semantic_breaks[match.span()[0]] = ''
+    annotations = {
+        'tags': {},
+        'semantic_breaks': semantic_breaks,
+        'section_ids': {},
+        'links': {},
+    }
+    return text, annotations
diff --git a/src/atextcrawler/resource/sitemap.py b/src/atextcrawler/resource/sitemap.py
new file mode 100644
index 0000000..e1b06aa
--- /dev/null
+++ b/src/atextcrawler/resource/sitemap.py
@@ -0,0 +1,149 @@
+"""
+Sitemap and SitemapIndex and related operations.
+"""
+
+import logging
+from datetime import datetime
+from typing import Optional
+
+import pytz
+
+from ..models import Sitemap, SitemapIndex, TextResource
+
+logger = logging.getLogger(__name__)
+
+
+async def get_sitemap_urls(
+    fetcher,
+    base_url: Optional[str],
+    sitemaps=None,
+) -> list[dict]:
+    """
+    Try to find sitemaps and fetch and return their URL content.
+
+    Each sitemapped URL is a dict with key 'loc' and optional key 'lastmod'.
+    """
+    if sitemaps:
+        # test example: https://www.berlin.de/
+        check_all = True
+    elif base_url:
+        sitemaps = [
+            base_url.rstrip('/') + '/sitemap.xml',
+            base_url.rstrip('/') + '/wp-sitemap.xml',
+            base_url.rstrip('/') + '/sitemap_index.xml',
+            base_url.rstrip('/') + '/sitemap.xml.gz',
+            base_url.rstrip('/') + '/sitemap_index.xml.gz',
+            base_url.rstrip('/') + '/sitemap.txt',
+            base_url.rstrip('/') + '/sitemap/',
+            base_url.rstrip('/') + '/sitemap1.xml',
+            base_url.rstrip('/') + '/sitemap-index.xml',
+            base_url.rstrip('/') + '/sitemapindex.xml',
+            base_url.rstrip('/') + '/sitemap/index.xml',
+        ]
+        check_all = False
+    else:
+        return []
+    urls = []
+    for sitemap in sitemaps:
+        resource = await fetcher.fetch(sitemap)
+        found = True
+        if isinstance(resource, SitemapIndex):
+            for sitemap_ in resource.sitemaps:
+                sitemaps.append(sitemap_['loc'])
+        elif isinstance(resource, Sitemap):
+            urls += resource.urls
+        elif isinstance(resource, TextResource) and resource.content_type in (
+            'html',
+            'plain',
+        ):
+            urls += [
+                {'loc': durl.url()}
+                for durl in resource.init_fields['links_int']
+            ]
+        else:
+            found = False
+        if found and not check_all:
+            break
+    return urls
+
+
+def parse_sitemapindex(sitemapindex):
+    """
+    Parse a sitemap index returning a `SitemapIndex` with found sitemaps.
+    """
+    sitemaps = []
+    for tag in sitemapindex.find_all('sitemap'):
+        if loc := tag.find('loc'):
+            if loc.string:
+                sitemap = {'loc': loc.string.strip()}
+                if lastmod := tag.find('lastmod'):
+                    try:
+                        t = datetime.fromisoformat(lastmod.string.strip())
+                        sitemap['lastmod'] = t
+                    except:
+                        pass
+                sitemaps.append(sitemap)
+    return SitemapIndex(sitemaps=sitemaps)
+
+
+def parse_sitemap(urlset) -> Sitemap:
+    """
+    Return a list of sitemap URLs.
+
+    Each URL is a dict with these keys+values:
+
+      * loc: the full URL of a mapped resource
+      * lastmod: optional datetime of its last modification
+      * changefreq: optional info on the change frequency to be expected
+      * priority: optional info on its priority relative to other resources
+
+    Cf. https://www.sitemaps.org/protocol.html
+    """
+    urls = []
+    for tag in urlset.find_all('url'):
+        if loc := tag.find('loc'):
+            if loc.string:
+                url = {'loc': loc.string.strip()}
+                if lastmod := tag.find('lastmod'):
+                    try:
+                        t = lastmod.string.strip().rstrip('Z')
+                        url['lastmod'] = (
+                            datetime.fromisoformat(t)
+                            .astimezone(pytz.utc)
+                            .replace(tzinfo=None)
+                        )
+                    except:
+                        pass
+                if changefreq := tag.find('changefreq'):
+                    url['changefreq'] = changefreq.string.strip()
+                if priority := tag.find('priority'):
+                    url['priority'] = priority.string.strip()
+                urls.append(url)
+    return Sitemap(urls=urls)
+
+
+def extract_sitemap_paths(
+    base_url: Optional[str],
+    urls: list[dict],
+) -> tuple[list[tuple[str, bool]], Optional[datetime]]:
+    """
+    Extract essential information from sitemap URLs.
+
+    Return a list of relative paths of the site's resources
+    (in a form to be easily fed into `add_site_paths`) and
+    the datetime of the latest change.
+
+    Relative paths are computed using base_url.
+    """
+    paths = []
+    latest = None
+    for url in urls:
+        loc = url['loc']
+        lastmod = url.get('lastmod')
+        if loc.startswith(base_url or ''):
+            path = loc.removeprefix(base_url or '').lstrip('/')
+            path = path.split('#', 1)[0]
+            paths.append((path, True))
+            if lastmod:
+                latest = max(lastmod, latest or lastmod)
+    return paths, latest
diff --git a/src/atextcrawler/search/__init__.py b/src/atextcrawler/search/__init__.py
new file mode 100644
index 0000000..b9a3bba
--- /dev/null
+++ b/src/atextcrawler/search/__init__.py
@@ -0,0 +1,6 @@
+from .engine import (
+    delete_resource,
+    index_resource,
+    shutdown_engine,
+    startup_engine,
+)
diff --git a/src/atextcrawler/search/engine.py b/src/atextcrawler/search/engine.py
new file mode 100644
index 0000000..7a72de6
--- /dev/null
+++ b/src/atextcrawler/search/engine.py
@@ -0,0 +1,270 @@
+"""
+Search engine, for now elasticsearch.
+
+We have one index per supported language and a default one.
+"""
+
+import logging
+import warnings
+from difflib import SequenceMatcher
+from typing import Union
+
+from elasticsearch import AsyncElasticsearch
+from elasticsearch.exceptions import NotFoundError
+
+from ..utils.annotation import pack_annotations
+from ..utils.section import concat_section_texts
+
+logger = logging.getLogger(__name__)
+
+
+warnings.filterwarnings(
+    'ignore',
+    'The client is unable to verify that the'
+    ' server is Elasticsearch due security privileges on the server side',
+)
+
+
+MIN_INDEXING_TIMEOUT_SECONDS = 5
+
+
+language_analyzers = {
+    'en': 'english',
+    'de': 'german',
+    #'fr': 'french',
+    #'el': 'greek',
+    #'es': 'spanish',
+    'default': 'standard',
+}
+
+
+properties = {
+    'resource_id': {'type': 'long'},
+    'site_id': {'type': 'long'},
+    'url': {'type': 'text'},
+    'base_url': {'type': 'text'},
+    'pub_date': {'type': 'date', 'format': 'yyyy-MM-dd||yyyy-MM||yyyy'},
+    'lang': {'type': 'keyword'},
+    'title': {'type': 'text'},
+    'authors': {'type': 'text'},
+    'summary': {'type': 'text'},
+    'keywords': {'type': 'text'},
+    'collections': {'type': 'keyword'},
+    'time_horizon': {'type': 'keyword'},
+    'orig_source': {'type': 'text'},
+    'topics': {'type': 'text'},
+    'annotations': {'type': 'text', 'index': False},
+    'sections': {
+        'type': 'nested',
+        'properties': {
+            'start_ids': {'type': 'integer'},
+            'end_ids': {'type': 'integer'},
+            'text': {'type': 'text', 'index_options': 'offsets'},
+            'embedding': {'type': 'dense_vector', 'dims': 512},
+        },
+    },
+}
+
+
+async def startup_engine(config):
+    """
+    Open the search engine for access.
+    """
+    engine = AsyncElasticsearch(
+        host=config['elasticsearch']['host'],
+        api_key=(
+            config['elasticsearch']['id'],
+            config['elasticsearch']['api_key'],
+        ),
+        use_ssl=False,
+        timeout=20,
+    )
+    engine.index_base_name = config['elasticsearch']['index_base_name']
+    await create_indices(engine)
+    await open_indices(engine)
+    return engine
+
+
+async def create_indices(engine):
+    """
+    Create indices for all configured langiages.
+    """
+    for lang, analyzer in language_analyzers.items():
+        index_name = engine.index_base_name + '_text_' + lang
+        if not await engine.indices.exists(index=index_name):
+            await engine.indices.create(index=index_name)
+        await engine.indices.close(index=index_name)
+        await engine.indices.put_settings(
+            index=index_name,
+            body={
+                'analysis': {'analyzer': {'default': {'type': analyzer}}},
+                'refresh_interval': '60s',
+            },
+        )
+        await engine.indices.put_mapping(
+            index=index_name,
+            body={'properties': properties},
+        )
+
+
+async def open_indices(engine):
+    """
+    Open indices for all configure languages.
+    """
+    for lang in language_analyzers.keys():
+        index_name = engine.index_base_name + '_text_' + lang
+        await engine.indices.open(index=index_name)
+
+
+async def shutdown_engine(engine):
+    """
+    Close the connection to the search engine.
+    """
+    # await close_indices(engine)
+    await engine.close()
+
+
+async def close_indices(engine):
+    """
+    Close indices. UNUSED.
+    """
+    for lang in language_analyzers.keys():
+        index_name = engine.index_base_name + '_text_' + lang
+        await engine.indices.close(index=index_name)
+
+
+async def index_resource(
+    engine,
+    tf,
+    site_path,
+    resource,
+    base_url,
+    url,
+):
+    """
+    Index a resource.
+    """
+    lang = resource.lang
+    index_lang = lang if lang in language_analyzers.keys() else 'default'
+    index_name = engine.index_base_name + '_text_' + index_lang
+    pub_date = resource.search_fields.get('pub_date')
+    if pub_date:
+        pub_date = str(pub_date.date())
+    text = resource.search_fields.get('text')
+    annotations = resource.search_fields.get('annotations')
+    semantic_breaks = annotations['semantic_breaks']
+    sections = []
+    for section_ids, txt in concat_section_texts(text, semantic_breaks):
+        embedding = await tf.embed(txt)
+        sections.append(
+            {
+                'start_ids': section_ids[0],
+                'end_ids': section_ids[-1],
+                'text': txt,
+                'embedding': embedding,
+            }
+        )
+    doc = {
+        'resource_id': resource.id_,
+        'site_id': site_path.site_id,
+        'url': url,
+        'base_url': base_url,
+        'pub_date': pub_date,
+        'lang': resource.lang,
+        'title': resource.search_fields.get('title'),
+        'authors': resource.search_fields.get('authors'),
+        'summary': resource.search_fields.get('summary'),
+        'keywords': resource.search_fields.get('keywords'),
+        'collections': resource.search_fields.get('collections'),
+        'time_horizon': resource.search_fields.get('time_horizon'),
+        'orig_source': resource.search_fields.get('orig_source'),
+        'topics': resource.search_fields.get('topics'),
+        'annotations': pack_annotations(annotations),
+        'sections': sections,
+    }
+    timeout_seconds = max(MIN_INDEXING_TIMEOUT_SECONDS, int(len(text) / 1000))
+    await engine.index(
+        id=resource.id_,
+        index=index_name,
+        body=doc,
+        timeout=f'{timeout_seconds}s',
+    )
+
+
+async def delete_resource(engine, lang, resource_id):
+    """
+    Delete a resource.
+    """
+    index_name = engine.index_base_name + '_text_' + (lang or 'default')
+    try:
+        await engine.delete(index_name, resource_id)
+    except NotFoundError:
+        msg = f'Cannot delete resource from index, not found: {resource_id}'
+        logger.warning(msg)
+
+
+async def find_duplicate(engine, site_id, resource) -> Union[bool, None, int]:
+    """
+    UNUSED.
+
+    Try to find a duplicate resource with matching site.
+
+    If the search backend query fails, return False.
+    If no matching resource was found, return None.
+    If a matching resource was found, return its id.
+    """
+    # get sample texts
+    text = resource.search_fields['text']
+    if not text or len(text) < 100:
+        return None
+    #  annotations = resource.search_fields['annotations']
+    #  semantic_breaks = annotations['semantic_breaks']
+    #  texts = []
+    #  for _, txt in concat_section_texts(text, semantic_breaks):
+    #      texts.append(txt)
+    #  texts = extract_samples(texts)
+
+    #  # search for sample texts
+    #  text_count = len(texts)
+    #  should_min = max(1, int(0.6 * text_count))
+    #  should = []
+    #  for text in texts:
+    #      should.append({'match': {'sections.text': text}})
+    query = {
+        'bool': {
+            'must': {
+                'nested': {
+                    'path': 'sections',
+                    'query': {'match': {'sections.text': text}},
+                },
+            },
+            'filter': {
+                'term': {
+                    'site_id': site_id,
+                },
+            },
+        }
+    }
+    fields = [
+        'url',
+        'sections.text',
+        'site_id',
+    ]
+    response = await engine.search(
+        index=engine.index_base_name + '_text_*',
+        body={
+            'query': query,
+            'fields': fields,
+            'from': 0,
+            'size': 3,
+            '_source': False,
+        },
+    )
+    if response['timed_out']:
+        return False
+    for hit in response.get('hits', {}).get('hits'):
+        txt = ' '.join(hit['fields']['sections.text'])
+        similarity = SequenceMatcher(None, text, txt).ratio()
+        if similarity > 0.99:
+            return hit['_id']
+    return None
diff --git a/src/atextcrawler/site/__init__.py b/src/atextcrawler/site/__init__.py
new file mode 100644
index 0000000..e2dacd9
--- /dev/null
+++ b/src/atextcrawler/site/__init__.py
@@ -0,0 +1,9 @@
+"""
+Websites.
+"""
+
+from .feeds import fetch_feeds
+from .operations import checkin_site, checkout_site, process_site, update_site
+from .queue import process_site_queue
+from .robots import RobotsInfo
+from .seed import load_seeds
diff --git a/src/atextcrawler/site/__main__.py b/src/atextcrawler/site/__main__.py
new file mode 100644
index 0000000..b40afc1
--- /dev/null
+++ b/src/atextcrawler/site/__main__.py
@@ -0,0 +1,68 @@
+"""
+Tool for analyzing a website.
+
+Fetch the startpage and output information to console.
+Do not change any persistent data.
+"""
+
+import asyncio
+import logging
+import sys
+
+import aiohttp
+
+from ..models import TextResource
+from ..resource import ResourceFetcher, extract_sitemap_paths, get_sitemap_urls
+from ..site.robots import RobotsInfo
+from ..utils.durl import Durl
+from .parse import parse_startpage
+
+logger = logging.getLogger()
+logger.setLevel(logging.WARNING)
+logger.addHandler(logging.StreamHandler())
+
+
+async def run():
+    """
+    Fetch the startpage of a website and show information about it.
+
+    The URL must be given as commandline argument.
+    """
+    base_url = sys.argv[1]
+    async with aiohttp.ClientSession() as session:
+        if not (base_durl := await Durl(base_url)):
+            return
+        fetcher = ResourceFetcher(session)
+        resource = await fetcher.fetch(base_url)
+        logger.warning(repr(resource))
+        if (
+            isinstance(resource, TextResource)
+            and resource.content_type == 'html'
+        ):
+            site = await parse_startpage(resource)
+            # site.crawl_enabled = await site_filter(site)
+            logger.warning(repr(site))
+            logger.warning('')
+            for durl, text in site.links_ext.items():
+                logger.warning(f'                  {durl} {text}')
+                logger.warning(f'{durl.url()} -------- {text}')
+            logger.warning('')
+            logger.warning(f'Redirects: {resource.init_fields["redirects"]}')
+            logger.warning('')
+            robots = await RobotsInfo(base_url)
+            urls = await get_sitemap_urls(
+                fetcher, base_url, sitemaps=robots.site_maps
+            )
+            paths, latest = extract_sitemap_paths(base_url, urls)
+            for path in paths:
+                logger.warning(path)
+            logger.warning(f'Feeds: {site.feeds}')
+            logger.warning(latest)
+            # sample_links = extract_samples(resource.init_fields['links_int'])
+            # logger.warning(f'************* {sample_links}')
+        else:
+            logger.warning('(No text resource or error.)')
+
+
+if __name__ == '__main__':
+    asyncio.run(run())
diff --git a/src/atextcrawler/site/feeds.py b/src/atextcrawler/site/feeds.py
new file mode 100644
index 0000000..da4fbda
--- /dev/null
+++ b/src/atextcrawler/site/feeds.py
@@ -0,0 +1,100 @@
+"""
+High-level feed-related stuff.
+
+See resource.feed for low-level stuff not primarily related to sites.
+"""
+
+from datetime import datetime
+from typing import Optional
+
+from ..models import Feed
+from ..resource import store_feed_entries, update_feed
+
+
+async def store_new_feeds(conn, site_id, feeds: dict):
+    """
+    Store new feeds in table site_feed.
+    """
+    sql = "SELECT array_agg(url) FROM site_feed WHERE site_id=$1"
+    known_feeds = (await conn.fetchval(sql, site_id)) or []
+    for feed_url in feeds.keys():
+        if feed_url not in known_feeds:
+            feed = Feed(
+                site_id=site_id,
+                url=feed_url,
+            )
+            await feed.save(conn)
+
+
+async def get_feeds(conn, site_id) -> list[Feed]:
+    """
+    Return stored feeds for the given site.
+    """
+    sql = "SELECT * FROM site_feed WHERE site_id=$1"
+    rows = (await conn.fetch(sql, site_id)) or []
+    return [(await Feed().load_from_row(row)) for row in rows]
+
+
+async def fetch_feeds(fetcher, conn, site) -> Optional[datetime]:
+    """
+    Fetch feeds, add new resources and return the latest content update time.
+    """
+    feeds = await get_feeds(conn, site.id_)
+    latest = None
+    for feed in feeds:
+        feed_content = await update_feed(fetcher, feed, conn)
+        if feed_content:
+            await store_feed_entries(conn, site, feed_content)
+            if feed.t_content:
+                latest = max(latest or feed.t_content, feed.t_content)
+    return latest
+
+
+if __name__ == '__main__':
+    # only use this on a dev instance!
+    import asyncio
+    import logging
+    import sys
+
+    import aiohttp
+
+    from ..config import Config
+    from ..db import PGPool
+    from ..resource.fetch import ResourceFetcher
+    from .operations import process_site, update_site
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.DEBUG)
+    config = Config().get()
+    url = sys.argv[1]
+
+    async def run():
+        """
+        Fetch and display a site.
+        """
+        app = None  # TODO
+        async with PGPool(config['postgresql']) as pool:
+            async with pool.acquire() as conn:
+                async with aiohttp.ClientSession() as session:
+                    fetcher = ResourceFetcher(session)
+                    site, _ = await update_site(app, fetcher, conn, url)
+                    logger.warning(site)
+                    await process_site(fetcher, conn, site)
+                    latest = await fetch_feeds(fetcher, conn, site)
+                    logger.warning(f'latest: {latest}')
+                    # feed = Feed(url=url)
+                    # feed_content = await update_feed(fetcher, feed, conn)
+                    # if isinstance(feed_content, ResourceError):
+                    #    print(feed_content)
+                    # else:
+                    #    print(feed)
+                    #    pprint(feed_content[0])
+                    #    print('---- 2nd try ----')
+                    #    feed_content = await update_feed(fetcher, feed, conn)
+                    #    if isinstance(feed_content, ResourceError):
+                    #        print(feed_content)
+                    #    else:
+                    #        print(feed)
+                    #        pprint(feed_content[0])
+
+    asyncio.run(run())
diff --git a/src/atextcrawler/site/operations.py b/src/atextcrawler/site/operations.py
new file mode 100644
index 0000000..36689c5
--- /dev/null
+++ b/src/atextcrawler/site/operations.py
@@ -0,0 +1,267 @@
+"""
+Operations on sites.
+"""
+
+import logging
+from datetime import datetime, timedelta
+from typing import Optional
+
+from asyncpg import Connection
+
+from ..models import Crawl, Site, TextResource
+from ..resource import (
+    add_site_paths,
+    extract_sitemap_paths,
+    get_sitemap_urls,
+    store_boilerplate_texts,
+)
+from ..utils.durl import Durl
+from ..utils.similarity import get_simhash_index
+from .feeds import fetch_feeds, store_new_feeds
+from .parse import parse_startpage
+from .robots import RobotsInfo
+
+logger = logging.getLogger(__name__)
+
+
+async def checkout_site(
+    app, conn: Connection
+) -> tuple[Optional[int], bool, bool]:
+    """
+    Get the id of a site to be crawled and mark it with crawl_active=true.
+
+    Also return whether the site shall be fully crawled; if not, this
+    means that just the resources from the feeds shall be crawled.
+
+    Also return whether more sites might be available.
+    """
+    async with conn.transaction():
+        sql = (
+            "SELECT id, next_full_crawl < now() at time zone 'UTC' is_full"
+            " FROM site WHERE crawl_enabled AND crawl_active = false"
+            " AND (next_full_crawl < now() at time zone 'UTC'"
+            " OR next_feed_crawl < now() at time zone 'UTC')"
+            " LIMIT 1 FOR UPDATE SKIP LOCKED"
+        )
+        row = await conn.fetchrow(sql)
+        if row:
+            site_id = row['id']
+            is_full = row['is_full']
+            sql = "UPDATE site SET crawl_active = true WHERE id=$1"
+            await conn.execute(sql, site_id)
+            site = await Site().load(conn, site_id)
+            if site:
+                site.base_durl = await Durl(site.base_url)
+                if site.base_durl:
+                    site.simhash_index = await get_simhash_index(conn, site_id)
+                    return site, is_full, True
+                else:
+                    # site not available; schedule next crawl
+                    int_full = app.config['crawl']['full_crawl_interval']
+                    int_feed = app.config['crawl']['feed_crawl_interval']
+                    now = datetime.utcnow()
+                    t_full = now + timedelta(seconds=int_full)
+                    t_feed = now + timedelta(seconds=int_full + int_feed)
+                    sql = (
+                        "UPDATE site SET crawl_active=false,"
+                        " next_full_crawl=$1, next_feed_crawl=$2"
+                        " WHERE id=$3"
+                    )
+                    await conn.execute(sql, t_full, t_feed, site_id)
+                    return None, False, True
+            return None, False, True
+    return None, False, False
+
+
+async def update_site(
+    app, fetcher, conn: Connection, base_url, site: Site = None
+) -> tuple[Optional[Site], bool]:
+    """
+    Try to fetch base_url and return a site and whether a new one was created.
+
+    This function is run for all sites (including blacklisted and irrelevant
+    ones). It determines whether the site shall be crawled.
+
+    If an errors occurs, return (None, False), and if a site was given,
+    also set it to crawl_enabled=False and remove crawling schedules.
+
+    If base_url could be fetched, update the site, possibly creating
+    a new one.
+
+    If the site has crawl_enabled, and no full crawl is scheduled,
+    schedule one (by updating column `next_full_crawl`).
+    """
+    # fetch startpage
+    logger.info(f'Updating site={site}, base_url={base_url}')
+    resource = await fetcher.fetch(base_url, site=site)
+    if (
+        not isinstance(resource, TextResource)
+        or resource.content_type != 'html'
+    ):
+        if site:
+            site.meta_info['error'] = 'Invalid start page'
+            site.crawl_enabled = False
+            site.next_full_crawl = None
+            site.next_feed_crawl = None
+            await site.save(conn)
+        logger.info(f'Failed startpage {base_url}: {resource}')
+        return None, False
+
+    # parse startpage (extract site information) and save the site
+    site = await parse_startpage(resource, app=app, site=site)
+    site_id, created = await site.save(conn)
+    if created:
+        logger.debug(f'Created {site}')
+
+    # add black-/white-listing info
+    is_allowed = await is_site_allowed(conn, site.id_, base_url)
+    if is_allowed is not None and is_allowed != site.crawl_enabled:
+        site.crawl_enabled = is_allowed
+        await site.save(conn)
+
+    # schedule full crawl, if none is scheduled and the site shall be crawled
+    if site.crawl_enabled:
+        sql = (
+            "UPDATE site"
+            " SET next_full_crawl=now() at time zone 'UTC'"
+            " WHERE id=$1 AND next_full_crawl IS null"
+        )
+        await conn.execute(sql, site_id)
+
+    return site, created
+
+
+async def is_site_allowed(
+    conn: Connection,
+    site_id: Optional[int],
+    base_url: str,
+) -> Optional[bool]:
+    """
+    Return True if the site is whitelisted, False if blacklisted, else None.
+
+    Also add missing site_ids to the annotations.
+    """
+    sql = "SELECT * FROM site_annotation WHERE site_id=$1 OR base_url=$2"
+    anns = await conn.fetch(sql, site_id, base_url)
+    for ann in anns:
+        if ann['ann_type'] == 'blacklist':
+            return False
+        if ann['ann_type'] == 'whitelist':
+            return True
+    # add missing site_ids
+    if site_id and any([ann['site_id'] is None for ann in anns]):
+        sql = "UPDATE site_annotation SET site_id=$1 WHERE base_url=$2"
+        await conn.execute(sql, site_id, base_url)
+    return None
+
+
+async def process_site(fetcher, conn: Connection, site: Site):
+    """
+    Process a site: fetch and store more information.
+
+    Store external and internal links; find boilerplate texts;
+    fetch sitemaps; fetch feeds; update date of last publication.
+    """
+    if not site.id_:  # only to satisfy typing
+        return
+    if site.links_ext:
+        await _store_cross_site_links(conn, site.id_, site.links_ext)
+    if site.links_int:
+        paths = []
+        for durl, (rel, _) in site.links_int.items():
+            canon = (rel and rel.lower() == 'canonical') or None
+            paths.append((durl.pwa(), canon))
+        await add_site_paths(conn, site.id_, paths)
+
+    await store_boilerplate_texts(fetcher, conn, site)
+
+    # get sitemaps and add their resources
+    robots = await RobotsInfo(site.base_url)  # type: ignore
+    urls = await get_sitemap_urls(
+        fetcher, site.base_url, sitemaps=robots.site_maps
+    )
+    paths_, latest = extract_sitemap_paths(site.base_url, urls)
+    await add_site_paths(conn, site.id_, paths_)
+
+    # store feeds and their resources
+    await store_new_feeds(conn, site.id_, site.feeds)
+    latest_ = await fetch_feeds(fetcher, conn, site)
+    if latest_:
+        latest = max(latest or latest_, latest_)
+
+    # update last_pub
+    if latest:
+        site.last_pub = latest
+    await site.save(conn)
+
+
+async def checkin_site(app, conn: Connection, site: Site, crawl: Crawl):
+    """
+    Unlock the site and schedule next crawl.
+
+    *crawl* is the crawl that has just finished (regularly or stopped).
+
+    If the crawl was stopped (t_end is None), just unlock the site.
+
+    Otherwise schedule a crawl of the same type. After a full crawl
+    also a feed crawl is scheduled, if there was none scheduled.
+    """
+    if crawl.t_end is None:
+        sql = "UPDATE site SET crawl_active=false WHERE id=$1"
+        await conn.execute(sql, site.id_)
+    elif crawl.is_full:
+        full_interval = app.config['crawl']['full_crawl_interval']
+        feed_interval = app.config['crawl']['feed_crawl_interval']
+        next_full_crawl = crawl.t_begin + timedelta(seconds=full_interval)
+        next_feed_crawl = crawl.t_begin + timedelta(seconds=feed_interval)
+        sql = (
+            "UPDATE site SET crawl_active=false, next_full_crawl=$1,"
+            " next_feed_crawl=coalesce(next_feed_crawl, $2) WHERE id=$3"
+        )
+        await conn.execute(sql, next_full_crawl, next_feed_crawl, site.id_)
+    else:
+        feed_interval = app.config['crawl']['feed_crawl_interval']
+        next_feed_crawl = crawl.t_begin + timedelta(seconds=feed_interval)
+        sql = (
+            "UPDATE site SET crawl_active=false, next_feed_crawl=$1"
+            " WHERE id=$2"
+        )
+        await conn.execute(sql, next_feed_crawl, site.id_)
+
+
+async def _store_cross_site_links(
+    conn: Connection,
+    site_id: int,
+    links: dict[Durl, tuple[list[str], str]],
+) -> None:
+    """
+    Put outgoing links into site_link/site_queue for existing/unknown sites.
+
+    Separate outgoing links from *site_id* into two classes:
+    (a) existing sites (rows in table site) and (b) unknown links.
+    Add links from class (a) to table site_link.
+    Add links from class (b) to table site_queue.
+    """
+    # add outgoing cross-site links for existing sites to table site_link
+    urls = [url.site() for url in links.keys()]
+    values = []
+    sql = "SELECT id, unnest(base_urls) url FROM site WHERE base_urls && $1"
+    if rows := await conn.fetch(sql, urls):
+        for row in rows:
+            if (durl := await Durl(row['url'])) in links.keys():
+                _, link_text = links.pop(durl)
+                if site_id != row['id']:
+                    values.append((site_id, row['id'], link_text))
+    sql = (
+        "INSERT INTO site_link (src, dst, link_text)"
+        " VALUES ($1, $2, $3) ON CONFLICT (src, dst) DO NOTHING"
+    )
+    await conn.executemany(sql, values)
+
+    # add outgoing cross-site links for unknown sites to table site_queue
+    sql = "INSERT INTO site_queue (src, url, link_text) VALUES ($1, $2, $3)"
+    values = [
+        (site_id, durl.site()[:200], link_text[:100])
+        for durl, (_, link_text) in links.items()
+    ]
+    await conn.executemany(sql, values)
diff --git a/src/atextcrawler/site/parse.py b/src/atextcrawler/site/parse.py
new file mode 100644
index 0000000..e29b15b
--- /dev/null
+++ b/src/atextcrawler/site/parse.py
@@ -0,0 +1,255 @@
+"""
+Parsing of a site's startpage.
+"""
+
+import re
+from datetime import datetime
+from typing import Any, Optional
+
+from ..models import Site, TextResource
+from ..resource import feed_types
+from ..utils.durl import Durl, get_ips
+from ..utils.html import clean_html
+from ..utils.lang import clean_lang
+from ..utils.link import (
+    extract_domain,
+    in_blacklist,
+    link_rels,
+    meta_names,
+    meta_props,
+)
+
+re_meta_keyword_sep = re.compile('[,;\r\n]')
+
+
+def cut_str(s: Optional[str], l: int) -> Optional[str]:
+    """
+    Cut a string *s* to a maximal length *l* from the left.
+    """
+    return s[:l] if s else None
+
+
+async def parse_startpage(
+    startpage: TextResource, app=None, site=None
+) -> Site:
+    """
+    Parse a site's startpage and return a Site instance.
+
+    If a site instance is given, update it.
+    """
+    durl = startpage.init_fields['durl']
+    soup = startpage.init_fields['head']
+    meta = collect_meta_tags(soup)
+    meta_links = await collect_meta_links(soup, durl)
+    links_ext = await collect_external_links(startpage, meta_links)
+    links_int = startpage.init_fields['links_int']
+    langs = extract_languages(startpage, meta, meta_links)
+    title, description, keywords = extract_meta_texts(startpage, meta)
+
+    # feeds
+    feeds = meta_links['feeds']
+    if 'wordpress' in meta.get('generator', '').lower():
+        url = durl.site() + 'feed/'
+        feeds[url] = 'application/rss+xml'
+    # TODO later: maybe also probe other possible feed paths 'rss', 'rss/'
+
+    # network params (canonical_url, base_urls, domains)
+    ips = await get_ips(durl.hostname)
+    redirects = []
+    for redirect in startpage.init_fields['redirects']:
+        redir_url = await Durl(redirect)
+        if redir_url:
+            redirects.append(redir_url.site())
+    base_urls = redirects + [durl.url()]
+    domains = [extract_domain(durl.hostname)]
+
+    if site:  # update an existing Site
+        site.canonical_url = meta_links['canonical_url'] or site.canonical_url
+        site.base_urls = base_urls
+        site.domains = domains
+        site.ips = ips
+        site.last_update = datetime.utcnow()
+        site.last_pub = startpage.last_change
+        site.langs = langs
+        site.alt_langs = meta_links['alt_langs']
+        site.title = title
+        site.description = description
+        site.keywords = keywords
+        site.linkbacks.update(meta_links['linkbacks'])
+        site.meta_info = meta
+        site.__post_init__(
+            base_durl=durl,
+            feeds=feeds,
+            links_ext=links_ext,
+            links_int=links_int,
+            startpage_text=startpage.search_fields['text'],
+        )
+    else:  # create new Site instance
+        site = Site(
+            # post_init fields
+            base_durl=durl,
+            feeds=feeds,
+            links_ext=links_ext,
+            links_int=links_int,
+            startpage_text=startpage.search_fields['text'],
+            # dataclass fields
+            canonical_url=meta_links['canonical_url'],
+            base_urls=base_urls,
+            domains=domains,
+            ips=ips,
+            last_update=datetime.utcnow(),
+            last_pub=startpage.last_change,
+            langs=list(langs),
+            alt_langs=meta_links['alt_langs'],
+            title=title,
+            description=description,
+            keywords=keywords,
+            linkbacks=meta_links['linkbacks'],
+            meta_info=meta,
+        )
+    if site.ips is None and site.url:
+        site.ips = await get_ips(site.url.hostname)
+    if app and site.startpage_text:
+        site_filter = app.plugins['filter_site'].site_filter
+        site.crawl_enabled = await site_filter(site)
+    return site
+
+
+def collect_meta_tags(soup):
+    """
+    Collect selected meta tags (meta_names and meta_props) with their values.
+    """
+    meta = {}
+    for tag in soup.find_all('meta'):
+        if (name := tag.get('name')) and name in meta_names:
+            meta[name] = tag.get('content')
+        if (property := tag.get('property')) in meta_props:
+            if content := tag.get('content'):
+                meta[property] = content
+        if tag.get('http-equiv') == 'content-language':  # old html
+            if content := tag.get('content'):
+                meta['http_equiv_lang'] = content
+    return meta
+
+
+async def collect_meta_links(soup, base_durl) -> dict[str, Any]:
+    """
+    Collect link tags with site scope (feeds, linkbacks, canonical, ...).
+    """
+    linkbacks = {}
+    feeds = {}
+    alt_langs = {}
+    canonical_url = None
+    for tag in soup.find_all('link'):
+        if not (rels := set(tag.get('rel', []))) or not rels & link_rels:
+            continue
+        if not (url := tag.get('href')):
+            continue
+        if not (link_durl := await Durl(url, base=base_durl)):
+            continue
+        if in_blacklist(link_durl.hostname):
+            continue
+        link_url = link_durl.url()
+        link_type = tag.get('type')
+        if link_type in feed_types:
+            feeds[link_url] = link_type
+        elif 'canonical' in rels:
+            canonical_url = link_url
+        elif 'alternate' in rels and (hreflang := tag.get('hreflang')):
+            if lang := clean_lang(hreflang):
+                alt_langs[lang] = link_durl.url()
+        elif 'webmention' in rels:
+            linkbacks[link_url] = 'webmention'
+        elif 'pingback' in rels:
+            linkbacks[link_url] = 'pingback'
+    if canonical_url:
+        if canonical_durl := await Durl(canonical_url):
+            canonical_url = canonical_durl.site()
+        else:
+            canonical_url = None
+    return {
+        'feeds': feeds,
+        'linkbacks': linkbacks,
+        'alt_langs': alt_langs,
+        'canonical_url': canonical_url,
+    }
+
+
+async def collect_external_links(startpage, meta_links) -> dict[str, str]:
+    """
+    Return external links (mapping from URL to link text) from startpage.
+
+    Also add links to alternate language variants of the site.
+    """
+    external_links = startpage.init_fields['links_ext'].copy()
+    netloc = startpage.init_fields['durl'].netloc
+    for lang, lang_url in meta_links['alt_langs'].items():
+        if netloc not in lang_url:
+            durl = await Durl(lang_url)
+            if durl:
+                external_links[durl] = f'Alternate language: {lang}'
+    return external_links
+
+
+def extract_meta_texts(page, meta) -> tuple[str, Optional[str], list[str]]:
+    """
+    Extract and return title, description, keywords from a page and meta tags.
+    """
+    title = meta.get('og:site_name')
+    if not title:
+        title = page.search_fields['title'] or ''
+        if meta_title := meta.pop('title', None):
+            if meta_title.lower() not in title.lower():
+                title += ('; ' if title else '') + meta_title
+    title = cut_str(clean_html(title), 200)
+    description = cut_str(clean_html(meta.pop('description', None)), 2000)
+    if meta_keywords := meta.pop('keywords', None):
+        kws = re_meta_keyword_sep.split(meta_keywords)
+        keywords = [kw.strip()[:50] for kw in kws if kw.strip()]
+        if len(keywords) < 2:
+            keywords = [
+                kw.strip()[:50]
+                for kw in meta_keywords.split(' ')
+                if kw.strip()
+            ]
+    else:
+        keywords = []
+    return title, description, keywords
+
+
+def extract_languages(page, meta, meta_links) -> set[str]:
+    """
+    Extract languages from a page's html tag, meta tags and HTTP headers.
+
+    Also add the language detected in the text content of the page.
+
+    Return a set of ISO 639-1 language codes.
+
+    See also https://www.w3.org/International/questions/qa-http-and-lang and
+    https://www.w3.org/International/questions/qa-html-language-declarations
+    """
+    languages = set()
+    if lang := clean_lang(page.lang):
+        languages.add(lang)
+    if lang := clean_lang(meta.get('http_equiv_lang')):
+        languages.add(lang)
+    if lang := clean_lang(meta.get('dc.language')):
+        languages.add(lang)
+    if lang := clean_lang(meta.get('og:locale')):
+        languages.add(lang)
+    for lang, lang_url in meta_links['alt_langs'].items():
+        if page.init_fields['durl'].netloc in lang_url:
+            if lng := clean_lang(lang):
+                languages.add(lng)
+    lngs = (
+        page.init_fields['headers']
+        .get('Content-Language', '')
+        .lower()
+        .replace(' ', '')
+        .split(',')
+    )
+    for lng in lngs:
+        if lang := clean_lang(lng):
+            languages.add(lang)
+    languages.add(page.lang)
+    return languages
diff --git a/src/atextcrawler/site/queue.py b/src/atextcrawler/site/queue.py
new file mode 100644
index 0000000..1cf77d6
--- /dev/null
+++ b/src/atextcrawler/site/queue.py
@@ -0,0 +1,127 @@
+"""
+Queue of sites.
+
+When processing a resource, its external links are put into database table
+`site_queue`.
+The items in `site_queue` are processed in :func:`process_site_queue`.
+This is done baseURL by baseURL (see :func:`iter_site_queue`).
+While doing this, cross-site links are put into table `site_link`.
+"""
+
+import logging
+from typing import AsyncIterator, Optional
+
+import aiohttp
+from asyncpg import Connection
+
+from ..resource import ResourceFetcher
+from .operations import update_site
+
+logger = logging.getLogger(__name__)
+
+
+async def process_site_queue(app, pool):
+    """
+    Loop over queued sites creating new sites and adding cross-site links.
+    """
+    site_delay = app.config['crawl']['site_delay']
+    resource_delay = app.config['crawl']['resource_delay']
+    async with pool.acquire() as conn:
+        async with aiohttp.ClientSession() as session:
+            fetcher = ResourceFetcher(session)
+            while app.running:
+                async for base_url, links_from in iter_site_queue(app, conn):
+                    # get or create site
+                    msg = f'Site queue: updating {base_url}'
+                    logger.debug(msg)
+                    site, created = await update_site(
+                        app, fetcher, conn, base_url
+                    )
+                    if site:
+                        await store_incoming_site_site_links(
+                            conn, site.id_, links_from
+                        )
+                    # delete handled queue items
+                    sql = "DELETE FROM site_queue WHERE url=$1"
+                    await conn.execute(sql, base_url)
+                    await app.sleep(resource_delay)
+                logger.debug(
+                    f'Queued sites exhausted, sleeping'
+                    f' for {site_delay} seconds'
+                )
+                await app.sleep(site_delay)
+
+
+async def iter_site_queue(
+    app, conn: Connection
+) -> AsyncIterator[tuple[str, dict[int, str]]]:
+    """
+    Yield URLs with aggregated link information from site_queue.
+
+    Yield a URL and a dict mapping ids of linking sites to link texts.
+    """
+    site_revisit_interval = app.config['crawl']['site_revisit_interval']
+    while app.running:
+        sql = (
+            "SELECT url, array_agg(src) srcs,"
+            " array_agg(link_text) link_texts"
+            " FROM site_queue GROUP BY url LIMIT 1"
+        )
+        row = await conn.fetchrow(sql)
+        if row:
+            base_url = row['url']
+            links_from = {}
+            srcs = row['srcs']
+            link_texts = row['link_texts']
+            for i in range(len(srcs)):
+                if src := srcs[i]:
+                    links_from[src] = link_texts[i]
+            if site_id := await site_recently_updated(
+                conn, base_url, site_revisit_interval
+            ):
+                # just store incoming links and remove the site from the queue
+                await store_incoming_site_site_links(conn, site_id, links_from)
+                sql = "DELETE FROM site_queue WHERE url=$1"
+                await conn.execute(sql, base_url)
+            else:
+                yield base_url, links_from
+        else:
+            break
+
+
+async def site_recently_updated(
+    conn: Connection,
+    base_url: str,
+    site_revisit_interval: float,
+) -> Optional[int]:
+    """
+    Return the id of the site with given base_url if it was updated recently.
+    """
+    sql = (
+        f"SELECT id FROM site WHERE $1=any(base_urls)"
+        f" AND last_update + interval '{site_revisit_interval} seconds'"
+        f" > now() at time zone 'utc' LIMIT 1"
+    )
+    site_id = await conn.fetchval(sql, base_url)
+    return site_id
+
+
+async def store_incoming_site_site_links(
+    conn: Connection, site_id: int, links_from: dict
+):
+    """
+    Store incoming site-site links (irrespective of crawl_enabled).
+
+    *site_id* is the id of the site to which the links in *links_from* point.
+    """
+    sql = (
+        "INSERT INTO site_link"
+        " (src, dst, link_text) VALUES ($1, $2, $3)"
+        " ON CONFLICT (src, dst) DO NOTHING"
+    )
+    values = [
+        (from_id, site_id, link_text)
+        for from_id, link_text in links_from.items()
+        if from_id != site_id
+    ]
+    await conn.executemany(sql, values)
diff --git a/src/atextcrawler/site/robots.py b/src/atextcrawler/site/robots.py
new file mode 100644
index 0000000..ff8f77c
--- /dev/null
+++ b/src/atextcrawler/site/robots.py
@@ -0,0 +1,98 @@
+"""
+Fetch and evaluate a website's robots.txt.
+"""
+
+import logging
+from typing import Optional, Union
+from urllib.robotparser import RobotFileParser
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+class RobotsInfo(RobotFileParser):
+    """
+    Obtain information from a site's robots.txt.
+
+    After instantiation you must await :meth:`startup`.
+    """
+
+    def __init__(
+        self,
+        site_url: str,
+        user_agent: str = '*',
+        session: aiohttp.ClientSession = None,
+    ):
+        super().__init__()
+        self.__user_agent = user_agent
+        self.__site_url = site_url.rstrip('/')
+        self.__robots_url = self.__site_url + '/robots.txt'
+        self.__timeout = aiohttp.ClientTimeout(sock_connect=2, sock_read=3)
+        self.__session = session
+
+    def __await__(self):
+        return self.__ainit__().__await__()
+
+    async def __ainit__(self):
+        if self.__session:
+            content = await self.__get_robots_txt(self.__session)
+        else:
+            async with aiohttp.ClientSession() as session:
+                content = await self.__get_robots_txt(session)
+        self.parse(content.splitlines())
+        self.__delay = self.crawl_delay(self.__user_agent)
+        request_rate = self.request_rate(self.__user_agent)
+        if request_rate:
+            self.__delay = request_rate.seconds / request_rate.requests
+        self.__site_maps = super().site_maps() or []
+        return self
+
+    async def __get_robots_txt(self, session: aiohttp.ClientSession) -> str:
+        """
+        Fetch and return the robots.txt over http.
+        """
+        try:
+            async with session.get(
+                self.__robots_url, timeout=self.__timeout
+            ) as resp:
+                if resp.status == 200:
+                    try:
+                        content = await resp.text()
+                    except:
+                        body = await resp.read()
+                        content = body.decode(
+                            resp.charset or 'utf-8', errors='ignore'
+                        )
+                else:
+                    content = ''
+        except aiohttp.ClientError:
+            content = ''
+        return content
+
+    @property
+    def user_agent(self) -> str:
+        """
+        The user agent being used.
+        """
+        return self.__user_agent
+
+    @property
+    def delay(self) -> Optional[Union[int, float]]:
+        """
+        The delay to be used between requests.
+        """
+        return self.__delay
+
+    @property
+    def site_maps(self) -> list[str]:
+        """
+        The list of sitemaps of the site.
+        """
+        return self.__site_maps
+
+    def can_fetch_url(self, url: str) -> bool:
+        """
+        Return whether fetching of the given *url* is allowed.
+        """
+        return super().can_fetch(self.__user_agent, url)
diff --git a/src/atextcrawler/site/seed.py b/src/atextcrawler/site/seed.py
new file mode 100644
index 0000000..0648b7f
--- /dev/null
+++ b/src/atextcrawler/site/seed.py
@@ -0,0 +1,72 @@
+"""
+Seeding of new installations with URLs from blacklists and whitelists.
+"""
+
+from pathlib import Path
+
+import asyncpg
+
+from ..utils.durl import Durl
+
+
+async def load_seeds(config: dict, pool: asyncpg.Pool) -> None:
+    """
+    Add seed file contents (site blacklist and whitelist).
+
+    If there are sites already, do nothing.
+    """
+    async with pool.acquire() as conn:
+        site_count = await conn.fetchval("SELECT count(*) FROM site")
+        if site_count:
+            return
+
+        # add blacklist entries
+        values = []
+        blacklist = _load_list(config['config_dir'], 'black')
+        for base_url in blacklist:
+            durl = await Durl(base_url)
+            if durl:
+                url = durl.site()
+                values.append((url, {'source': 'seed file'}))
+        sql = (
+            "INSERT INTO site_annotation (base_url, ann_type, ann_content)"
+            " VALUES ($1, 'blacklist', $2)"
+        )
+        await conn.executemany(sql, values)
+
+        # add whitelist entries
+        values1 = []
+        values2 = []
+        whitelist = _load_list(config['config_dir'], 'white')
+        for base_url in whitelist:
+            durl = await Durl(base_url)
+            if durl:
+                url = durl.site()
+                if url not in blacklist:
+                    values1.append((url, {'source': 'seed file'}))
+                    values2.append((url,))
+        sql = (
+            "INSERT INTO site_annotation (base_url, ann_type, ann_content)"
+            " VALUES ($1, 'whitelist', $2)"
+        )
+        await conn.executemany(sql, values1)
+        sql = "INSERT INTO site_queue (src, url) VALUES (null, $1)"
+        await conn.executemany(sql, values2)
+
+
+def _load_list(config_dir, black_white):
+    """
+    Load the seed black or white list.
+    """
+    path = Path(config_dir) / 'initial_data' / f'seed_urls.list'
+    with open(path, 'r') as list_file:
+        urls = []
+        for line in list_file.read().strip().splitlines():
+            line_ = line.strip()
+            if line_.startswith('#'):
+                continue
+            if black_white == 'black' and line_.startswith('-'):
+                urls.append(line_[1:].strip())
+            if black_white == 'white' and line_.startswith('+'):
+                urls.append(line_[1:].strip())
+    return urls
diff --git a/src/atextcrawler/tensorflow.py b/src/atextcrawler/tensorflow.py
new file mode 100644
index 0000000..197572c
--- /dev/null
+++ b/src/atextcrawler/tensorflow.py
@@ -0,0 +1,69 @@
+"""
+Query the tensorflow_model_server's REST API.
+"""
+
+import logging
+from typing import Optional, Union
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+class TensorFlow:
+    """
+    Fetch an embedding vector from the tensorflow model server.
+    """
+
+    def __init__(
+        self,
+        app,
+        session: aiohttp.ClientSession,
+        timeout_sock_connect: Union[int, float] = 0.5,
+        timeout_sock_read: Union[int, float] = 10,
+    ):
+        self.config = app.config['tensorflow']
+        self.session = session
+        self.timeout = aiohttp.ClientTimeout(
+            sock_connect=timeout_sock_connect, sock_read=timeout_sock_read
+        )
+
+    async def embed(
+        self, text: Union[str, list[str]]
+    ) -> Optional[Union[list[float], list[list[float]]]]:
+        """
+        Query the tensorflow_model_server's REST API for a prediction.
+
+        Take a string or a list of strings and return an embedding vector
+        or a list of embedding vectors.
+
+        If the request fails or times out, return None.
+        """
+        text_ = text if isinstance(text, list) else [text]
+        data = {'signature_name': 'serving_default', 'instances': text_}
+        try:
+            async with self.session.post(
+                self.config['model_server_endpoint'],
+                json=data,
+                timeout=self.timeout,
+            ) as resp:
+                try:
+                    res = await resp.json()
+                    if isinstance(text, list):
+                        return res.get('predictions')
+                    else:
+                        return res.get('predictions')[0]
+                except:
+                    msg = 'Got invalid response from tensorflow'
+                    logger.error(msg)
+                    return None
+        except Exception as err:
+            msg = 'Could not get embedding from tensorflow for '
+            if isinstance(text, str):
+                msg += f'string of length {len(text)}'
+            else:
+                msg += 'list of strings with lengths '
+                msg += ','.join([str(len(s)) for s in text])
+            msg += f', reason: {err}'
+            logger.error(msg)
+            return None
diff --git a/src/atextcrawler/utils/__init__.py b/src/atextcrawler/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/atextcrawler/utils/annotation.py b/src/atextcrawler/utils/annotation.py
new file mode 100644
index 0000000..24ca149
--- /dev/null
+++ b/src/atextcrawler/utils/annotation.py
@@ -0,0 +1,481 @@
+"""
+Convert html to plain text with annotations over character ranges.
+"""
+
+import re
+from collections import defaultdict
+from html.parser import HTMLParser
+
+from .json import json_dumps, json_loads
+from .link import nofollow_link_rels
+from .tag import keep_tags, self_closing_tags
+
+MAX_HREF_LENGTH = 200
+"""
+Maximum length of an href. Other links are discarded.
+"""
+
+
+text_blacklist = [
+    'previous',
+    'next',
+    'back',  # common pagination navigation
+    '↩︎',  # amusewiki footnote separator (after conversion from muse to html)
+]
+"""
+Texts to ignore.
+"""
+
+
+class AnnotatingParser(HTMLParser):
+    """
+    Parse tagged text resulting in pure text and annotations.
+
+    The text is available in self.text and the annotations
+    in self.annotations, which is a dict with these keys:
+
+      * tags: contains a mapping of offset ranges (i, f) to
+        the tags opening at i and closing at f
+      * semantic_breaks: a mapping of offset positions where
+        a new section begins to the nesting level of that
+        sections; a section is whereever an (opening or closing)
+        separating tag is placed in the raw html; for the
+        separating flag of tags see tag.py
+      * links: a mapping of hrefs to link texts obtained from
+        anchor (a) tags; we skip hyperref with nofollow rels
+      * section_ids: map an offset position to the first
+        id attribute (of any tag) at the beginning of a
+        semantic section; this can later be used in a URL
+        fragment for linking directly into this section
+
+    Internally, we put opening tags on self.stack and pop them
+    when the first matching closing tag is encountered. We assume
+    balanced tags (tidy html).
+
+    NB: all tags with semantic breaks have sep=True, i.e.,
+    they will have spaces around them so that the semantic breaks
+    always sit on a space; the semantic break position p is the end
+    of the last section and the next sections begins at p + 1.
+
+    The text alway begins with a ' ' (added if not in the original),
+    which is assigned a semantic break with default level 80
+    (if there is no semantic break tag at the beginning).
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.text = ' '  # concatenated text data (without tags)
+        self.pos = 1  # equal to len(self.text)
+        self.stack = []
+        self.tags = defaultdict(dict)
+        self.semantic_breaks = {0: 80}
+        self.tag_id = None
+        self.section_ids = defaultdict(list)
+        self.links = {}
+        self.add_space = False
+
+    def close(self):
+        """
+        Finish by collecting results in dict `self.annotations`.
+        """
+        super().close()
+        self.annotations = {}
+        self.annotations['links'] = self.links
+        self.annotations['semantic_breaks'] = {
+            pos: lvl for pos, lvl in sorted(self.semantic_breaks.items())
+        }
+        self.annotations['tags'] = self.tags
+        self.annotations['section_ids'] = self.section_ids
+
+    def handle_starttag(self, tag, attrs):
+        """
+        Called for each opening tag.
+        """
+        sep, lvl, sem = keep_tags[tag]
+        attrs = dict(attrs)
+        if sep:
+            self.add_space = True
+        if tag == 'section' and 'endnotes' in attrs.get('role', ''):
+            lvl = 25
+        # ARIA roles
+        if role := attrs.get('role'):
+            if role == 'article':
+                lvl = 15
+            elif role == 'heading':
+                if aria_level := attrs.get('aria-level'):
+                    if aria_level in (1, 2, 3, 4, 5, 6):
+                        sep, lvl, sem = keep_tags[f'h{aria_level}']
+            elif role == 'region':
+                lvl = 24
+        i = self.pos
+        if tag in self_closing_tags:
+            # self-closing tags will not be added to the result tags,
+            # they only appear in semantic_breaks
+            # the two self-closing tags br and hr both have lvl and sep
+            if i == 1:  # replace the default semantic break at pos 0
+                i = 0
+            self.add_semantic_break(i, lvl)
+            i += 1
+            if tag_id := attrs.get('id'):
+                self.tag_id = i, tag_id
+                self.add_tag_id(i)  # br or hr may have an id, too
+            self.add_space = True
+        else:
+            self.stack.append((i, tag, sep, lvl, sem, attrs))
+            # forget outdated tag id at new semantic break
+            if lvl:
+                self.forget_tag_id()
+            # memorize tag id
+            if not self.tag_id and (tag_id := attrs.get('id')):
+                self.tag_id = self.pos, tag_id
+
+    def handle_endtag(self, tag):
+        """
+        Called for each closing tag.
+        """
+        if not self.stack or (self.stack and self.stack[-1][1] != tag):
+            return  # nothing to do for an already closed self-closing tag
+        i, tag_, sep, lvl, sem, attrs = self.stack.pop()
+        f = self.pos
+        # omit tag without content
+        if i == f:
+            return
+        # for a closing div tag revise lvl to minimum level of contained
+        # semantic breaks (if any)
+        if tag == 'div':
+            min_lvl = 101
+            for pos_, lvl_ in reversed(self.semantic_breaks.items()):
+                if pos_ <= i:
+                    break
+                min_lvl = min(min_lvl, lvl_)
+            if min_lvl < 101:
+                lvl = min_lvl
+        # add semantic break and an optional section_id
+        if lvl:
+            if i == 1:  # replace the default semantic break at pos 0
+                i = 0
+            if tag in ('ul', 'ol', 'li'):
+                seen_tags = [x[1] for x in self.stack]
+                if 'p' not in seen_tags:
+                    lvl = 52 + seen_tags.count('tag')
+                    if tag == 'li':
+                        lvl += 1
+            self.add_semantic_break(i, lvl)
+            self.add_tag_id(i)
+        # do not include surrounding spaces in tag span
+        if self.text[i] == ' ':
+            i += 1
+        # add tag
+        self.tags[(i, f)][tag] = sem
+        # add space (when handling next data)
+        if sep:
+            self.add_space = True
+        # collect links
+        if tag == 'a':
+            self.extract_link(i, attrs)
+
+    def handle_data(self, text):
+        """
+        Called for each non-tag content between tags.
+        """
+        # handle empty or blacklisted text
+        if text == '':
+            return
+        if text == ' ':
+            self.add_space = True
+            return
+        if text.strip().lower() in text_blacklist:
+            if ' ' in text:
+                self.add_space = True
+            return
+        # add a space (at self.pos) if the text begins with one
+        # or if we shall add one
+        startswith_space = text.startswith(' ')
+        text = text.lstrip()
+        if startswith_space or self.add_space:
+            if self.text[-1] != ' ':
+                self.text += ' '
+                self.pos += 1
+        self.add_space = False
+        # strip a space at the end of text and handle it in end tag
+        if text.endswith(' '):
+            text = text[:-1]
+            self.add_space = True
+        # add text to self.text
+        self.text += text
+        self.pos += len(text)
+
+    def add_semantic_break(self, pos, lvl):
+        """
+        Add a semantic break of level *lvl* at position *pos*.
+        """
+        if pos in self.semantic_breaks:
+            self.semantic_breaks[pos] = min(self.semantic_breaks[pos], lvl)
+        else:
+            self.semantic_breaks[pos] = lvl
+
+    def forget_tag_id(self):
+        """
+        Reset a tag id if it is too far behind in the text stream.
+        """
+        if self.tag_id:
+            pos_, tag_id = self.tag_id
+            if pos_ + 200 < self.pos:
+                self.tag_id = None
+
+    def add_tag_id(self, pos):
+        """
+        Add and clear an id if the just closing section has none yet.
+
+        *pos* is the start position of the current section, and the
+        position where the id will be added.
+
+        Add an id only if we are not too far in the section's text already.
+        """
+        if self.tag_id:
+            pos_, tag_id = self.tag_id
+            if pos_ < pos + 100 and pos not in self.section_ids:
+                self.section_ids[pos].append(tag_id.lower())
+        self.tag_id = None
+
+    def extract_link(self, i, attrs):
+        """
+        Add a link covering character range (i, self.pos).
+
+        From html *attrs* extract href and rel.
+        """
+        if (href := attrs.get('href')) and not attrs.get('rel') == 'nofollow':
+            if href.startswith('#'):
+                return
+            if len(href) > MAX_HREF_LENGTH:
+                return
+            attrs.get('title', '')
+            if rel := attrs.get('rel'):
+                if set(rel) & nofollow_link_rels:
+                    return
+            self.links[href] = i, self.pos, rel
+
+
+def annotate(html):
+    """
+    Split html text into plain text with annotations (from AnnotatingParser).
+    """
+    parser = AnnotatingParser()
+    parser.reset()
+    parser.feed(html)
+    parser.close()
+    return parser.text, parser.annotations
+
+
+re_footnote = re.compile(r'^\s*\[\d+\]\s+')
+
+
+def headline_probability(text, tags, lvl) -> float:
+    """
+    Estimate the probability that the text with tags is a headline.
+
+    The context is not considered: The question is not whether the
+    text is a headline for the following text.
+    """
+    text = text.strip()
+    res = 0.0
+    if not text:
+        return res
+    if lvl < 60:
+        return 1.0
+    # if 'h1' in tags or 'h2' in tags or 'h3' in tags or\
+    #    'h4' in tags or 'h5' in tags or 'h6' in tags or 'center' in tags:
+    #        return 1.0
+    if len(text) < 80:
+        res = 0.7
+    else:
+        res = 0.7 - 0.7 * (len(text) - 80) / 200
+    if 'p' in tags:
+        res -= 0.4
+    if 'em' in tags:
+        res += 0.3
+    if 'a' in tags:
+        res -= 0.1
+    if text[-1] in '.:':
+        res -= 0.3
+    res -= 0.1 * text.count(', ')
+    if re_footnote.match(text):
+        res -= 0.4
+    return max(res, 0.0)
+
+
+def get_tag_counts(tag_names, i, f, tags, text) -> tuple[int, float, float]:
+    """
+    Return the info on the share of characters covered with one of the *tags*.
+
+    Only consider the characters between i and f of string *text*.
+
+    Return the number of tags that have an overlap in the specified region,
+    the tag density in the region (fraction of covered characters by all),
+    and the average number of covered chars per tag.
+
+    NB: If more than one tag name is given, then the fractional share
+    may exceed 1.
+    """
+    if i == f:
+        return 0, 0.0, 0.0
+    tag_count = 0
+    covered_chars = 0
+    for (s_i, s_f), anns in tags.items():
+        if overlap := range_overlap(i, f - 1, s_i, s_f - 1):
+            for ann in anns:
+                if ann in tag_names:
+                    tag_count += 1
+                    covered_chars += overlap[1] - overlap[0]
+    all_chars = f - i
+    tag_density = covered_chars * 1.0 / all_chars
+    avg_text_len = covered_chars * 1.0 / tag_count if tag_count else 0
+    return tag_count, tag_density, avg_text_len
+
+
+def range_overlap(i1, f1, i2, f2):
+    """
+    Return the overlap of both ranges (None if there is none).
+    """
+    return None if f1 <= i2 or f2 <= i1 else (max(i1, i2), min(f1, f2))
+
+
+def annotations_remove_section(annotations, i, f):
+    """
+    Remove section (i, f) from annotations and return result.
+    """
+    new_annotations = {}
+    d = f - i
+    if not d:
+        return annotations
+
+    # relocate tags
+    new_tags = {}
+    for (t_i, t_f), anns in annotations['tags'].items():
+        n_i, n_f = cut_range(i, f, d, t_i, t_f)
+        if n_i is not None:
+            new_tags[(n_i, n_f)] = anns
+    new_annotations['tags'] = new_tags
+
+    # relocate links
+    new_links = {}
+    for href, (l_i, l_f, rel) in annotations['links'].items():
+        n_i, n_f = cut_range(i, f, d, l_i, l_f)
+        if n_i is not None:
+            new_links[href] = n_i, n_f, rel
+
+    # relocate semantic breaks and section_ids
+    semantic_breaks = annotations['semantic_breaks']
+    section_ids = annotations['section_ids']
+    new_semantic_breaks = {}
+    new_section_ids = {}
+    for pos in sorted(semantic_breaks.keys()):
+        level = semantic_breaks[pos]
+        if i <= pos and pos < f:
+            continue  # discard
+        elif f <= pos:
+            new_semantic_breaks[pos - d] = level
+            if pos in section_ids:
+                new_section_ids[pos - d] = section_ids[pos]
+        else:
+            new_semantic_breaks[pos] = level
+            if pos in section_ids:
+                new_section_ids[pos] = section_ids[pos]
+
+    # collect and return results
+    new_annotations['semantic_breaks'] = new_semantic_breaks
+    new_annotations['section_ids'] = new_section_ids
+    new_annotations['links'] = new_links
+    return new_annotations
+
+
+def cut_range(i, f, d, t_i, t_f):
+    """
+    Return the new coordinates of a text range (t_i,t_f) after cutting (i,f).
+
+    If (t_i,t_f) is fully within (i,f), return None, None.
+    """
+    if t_f < i:
+        return t_i, t_f
+    elif t_i < i <= t_f <= f:
+        return t_i, i
+    elif t_i < i and f <= t_f:
+        return t_i, t_f - d
+    elif i <= t_i and t_f <= f:
+        return None, None
+    elif i <= t_i <= f < t_f:
+        return i, t_f - d
+    else:  # f < t_i
+        return t_i - d, t_f - d
+
+
+def clean_annotations(annotations: dict) -> None:
+    """
+    Remove void stuff from annotations.
+    """
+    cleaned_tags = {}
+    for (i, f), anns in annotations['tags'].items():
+        if f > i and anns:
+            cleaned_tags[(i, f)] = anns
+    annotations['tags'] = cleaned_tags
+
+
+def pack_annotations(annotations):
+    """
+    Pack annotations to a special JSON string, reducing their volume a little.
+    """
+    return json_dumps(
+        {
+            'tags': _pack_tags(annotations['tags']),
+            'semantic_breaks': ','.join(
+                [
+                    f'{pos}:{level}'
+                    for pos, level in annotations['semantic_breaks'].items()
+                ]
+            ),
+            'section_ids': annotations['section_ids'],
+            'links': annotations['links'],
+        }
+    )
+
+
+def _pack_tags(tags: dict) -> str:
+    """
+    Utility function for packing tag information into a string.
+    """
+    res = ''
+    for (i, f), anns in tags.items():
+        if anns:
+            anns_ = ','.join([f'{tag}={sem}' for tag, sem in anns.items()])
+            res += f'{i}-{f}:{anns_}\n'
+    return res
+
+
+def unpack_annotations(json_text: str) -> dict:
+    """
+    Unpack tag information from a string.
+    """
+    annotations = json_loads(json_text)
+    tags = {}
+    for line in annotations['tags'].split('\n'):
+        if line:
+            range_, anns_ = line.split(':')
+            i, f = range_.split('-')
+            i = int(i)
+            f = int(f)
+            anns = {}
+            if anns_:
+                for ann_ in anns_.split(','):
+                    tag_, sem_ = ann_.split('=')
+                    anns[tag_] = sem_
+        tags[(i, f)] = anns
+    semantic_breaks = {}
+    for sb_ in annotations['semantic_breaks'].split(','):
+        pos_, lvl_ = sb_.split(':')
+        semantic_breaks[int(pos_)] = int(lvl_)
+    return {
+        'tags': tags,
+        'semantic_breaks': semantic_breaks,
+        'section_ids': annotations['section_ids'],
+        'links': annotations['links'],
+    }
diff --git a/src/atextcrawler/utils/date_finder.py b/src/atextcrawler/utils/date_finder.py
new file mode 100644
index 0000000..a787d2b
--- /dev/null
+++ b/src/atextcrawler/utils/date_finder.py
@@ -0,0 +1,90 @@
+"""
+Find date expressions in a string.
+"""
+
+import re
+from datetime import datetime
+from typing import Optional
+
+p_day = r'(0?[1-9]|[12][0-9]|3[01])'
+p_month = r'(0?[1-9]|1[0-2])'
+p_year = r'(20\d\d|19\d\d)'
+sep = r'\D{1,2}'
+p_t = r'(\D{0,4}([01][0-9]|2[0-3]):([0-5][0-9]))?'
+
+
+format_re = {
+    'iso': (
+        re.compile(f'(^|\\D){p_year}{sep}{p_month}{sep}{p_day}(\\D{p_t}|$)'),
+        (1, 2, 3, 6, 7),
+    ),
+    'dmy': (
+        re.compile(f'(^|\\D){p_day}{sep}{p_month}{sep}{p_year}(\\D{p_t}|$)'),
+        (3, 2, 1, 6, 7),
+    ),
+    'mdy': (
+        re.compile(f'(^|\\D){p_month}{sep}{p_day}{sep}{p_year}(\\D{p_t}|$)'),
+        (3, 1, 2, 6, 7),
+    ),
+}
+
+
+lang_format = {
+    'de': ('iso', 'dmy'),
+    'en': ('iso', 'mdy'),
+    None: ('iso', 'dmy', 'mdy'),
+}
+
+
+def extract_latest_date(text: str, lang: str = None) -> Optional[datetime]:
+    """
+    Extract the latest date compatible with the *lang* from *text*.
+
+    Only consider dates in the past.
+    """
+    dates = extract_dates(text, lang=lang)
+    return max(dates) if dates else None
+
+
+def extract_dates(text: str, lang: str = None) -> list[datetime]:
+    """
+    Extract dates form a string, optionally limiting formats to a language.
+    """
+    dates = []
+    fmts = lang_format.get(lang, lang_format[None])
+    for fmt in fmts:
+        re_, slots = format_re[fmt]
+        matches = re_.findall(text)
+        if matches:
+            for match in matches:
+                try:
+                    date = datetime(
+                        int(match[slots[0]]),
+                        int(match[slots[1]]),
+                        int(match[slots[2]]),
+                        int(match[slots[3]] or 0),
+                        int(match[slots[4]] or 0),
+                    )
+                    if date <= datetime.utcnow():
+                        dates.append(date)
+                except:
+                    pass
+    return dates
+
+
+## from htmldate import find_date
+
+# def extract_last_pub(html):
+#    """
+#    Return an estimate for the time of last content publication from html.
+#    """
+#    # https://stackoverflow.com/questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
+#    lxml_tree = lxml_html.fromstring(bytes(html, encoding='utf8'))
+#    # publication date (from startpage)
+#    try:
+#        date_string = find_date(lxml_tree)
+#        pd = date.fromisoformat(date_string)
+#        last_pub = datetime(pd.year, pd.month, pd.day, 12, 0, 0)
+#    except:
+#        last_pub = None
+#    return last_pub
diff --git a/src/atextcrawler/utils/durl.py b/src/atextcrawler/utils/durl.py
new file mode 100644
index 0000000..7837d68
--- /dev/null
+++ b/src/atextcrawler/utils/durl.py
@@ -0,0 +1,278 @@
+"""
+Hyperlink parsing.
+"""
+
+import logging
+from typing import Optional
+from urllib.parse import urlsplit
+
+import tldextract
+from async_dns import types
+from async_dns.resolver import ProxyResolver
+from async_lru import alru_cache
+
+from .link import in_blacklist
+
+logger = logging.getLogger(__name__)
+
+
+resolver = ProxyResolver(request_timeout=2)
+
+
+async_dns_logger = logging.getLogger('async_dns')
+async_dns_logger.setLevel(logging.WARNING)
+
+
+extract = tldextract.TLDExtract(cache_dir=False)
+
+
+# tldextract uses filelock; set its loglevel to warning
+filelock_logger = logging.getLogger('filelock')
+filelock_logger.setLevel(logging.WARNING)
+
+
+class Durl:
+    """
+    Decomposed URL, contains :class:`urllib.parse.SplitResult`.
+
+    When constructing this class, it has to be awaited, e.g.:
+
+         my_durl = await Durl('http://www.example.com/whatever')
+
+    The given URL will be decomposed, validated and normalized.
+    If the URL is invalid, we return None instead of an instance.
+
+    If the given *base* is None, the URL must be absolute and
+    the hostname must be valid (DNS lookup).
+
+    If the given URL is not absolute, an already decomposed (and thus
+    valid) *base* Durl must be given; otherwise the URL is invalid.
+
+    The *base* Durl can contain a path (but no arguments or fragments),
+    in which case the URL - if not absolute - must begin with this path.
+
+    The scheme must be http or https. If the URL begins with '//',
+    'http:' is prepended.
+
+    If the hostname is longer than 90 characters, the URL is invalid.
+
+    Default port numbers (80 for http, 443 for https) are removed.
+
+    The hostname is changed to lower case. Spaces in the hostname
+    make the URL invalid.
+
+    URL fragments are removed.
+    """
+
+    _url = None
+    _base = None
+    _match_base = False
+
+    def __init__(
+        self,
+        url: str,
+        base: Optional['Durl'] = None,
+        match_base: bool = False,
+    ):
+        self._url = url
+        self._base = base
+        self._match_base = match_base
+
+    def __await__(self):
+        return self.__ainit__().__await__()
+
+    async def __ainit__(self):
+        res = None
+        try:
+            # add missing scheme for urls beginning with '//'
+            if self._url.startswith('//'):
+                self._url = 'http:' + self._url
+            # split the url
+            durl = urlsplit(self._url)
+            # remove default port numbers 80, 443
+            netloc = durl.netloc
+            if durl.port == 80 and durl.scheme == 'http':
+                netloc = netloc.removesuffix(str(durl.port)).rstrip(':')
+            if durl.port == 443 and durl.scheme == 'https':
+                netloc = netloc.removesuffix(str(durl.port)).rstrip(':')
+            if durl.hostname and durl.hostname != durl.netloc.lower():
+                user_pass = ''
+                if durl.username and durl.password:
+                    user_pass = f'{durl.username}:{durl.password}@'
+                port = ''
+                if durl.port:
+                    port = f':{durl.port}'
+                netloc = f'{user_pass}{durl.hostname.lower()}{port}'
+            durl = durl._replace(netloc=netloc)
+
+            if self._base:
+                # if missing fill in scheme and netloc from base
+                if not durl.scheme:
+                    durl = durl._replace(scheme=self._base.scheme)
+                if not durl.netloc:
+                    durl = durl._replace(netloc=self._base.netloc)
+                # if match_base, then set res only if the
+                # url is compatible with base url
+                if not self._match_base:
+                    res = durl
+                else:
+                    if durl.netloc == self._base.netloc:
+                        if durl.scheme == self._base.scheme:
+                            if self._base.path not in ('/', ''):
+                                if durl.path.startswith(self._base.path):
+                                    res = durl
+                            else:
+                                res = durl
+            else:
+                res = durl
+        except:
+            logger.exception(
+                f'Durl init failed url={self._url}'
+                f' base={self._base} match_base={self._match_base}'
+            )
+            res = None
+        if res:
+            res = res._replace(fragment='')
+            if not res.hostname or len(res.hostname) > 90:
+                res = None
+            elif res.scheme not in ('https', 'http'):
+                res = None
+            elif ' ' in res.hostname or '.' not in res.hostname:
+                res = None
+            elif not (await get_ips(res.hostname)):
+                res = None
+            elif not res.path.startswith('/'):
+                res = res._replace(path='/')
+        if res:
+            if res.fragment is None:
+                res.fragment = ''
+            self._durl = res
+            return self
+        self._durl = None
+
+    def __getattr__(self, attr):
+        return getattr(self._durl, attr)
+
+    def url(self) -> str:
+        """
+        Return the URL as string.
+        """
+        return self._durl.geturl()
+
+    def pwa(self) -> str:
+        """
+        Return the (base-relative) path with args of the Durl.
+        """
+        if self._base and self._match_base:
+            path = self._durl.path.removeprefix(self._base.path)
+        else:
+            path = self._durl.path
+        qs = f'?{self._durl.query}' if self._durl.query else ''
+        return f'{path}{qs}'.lstrip('/')
+
+    def has_path(self) -> bool:
+        """
+        Return whether the Durl has a non-trivil path.
+        """
+        return self._durl.path not in ('/', '')
+
+    def site(self) -> str:
+        """
+        Return the site (base_url).
+        """
+        return f'{self._durl.scheme}://{self._durl.netloc}/'
+
+    def domain(self) -> str:
+        """
+        Return the domain of the Durl (wrong in case of second-level domains).
+        """
+        levels = extract(self._durl.hostname)
+        return '.'.join(levels[-2:]).lower()
+
+    def replace_scheme(self, scheme: str) -> None:
+        """
+        Replace the scheme (must be 'http' or 'https').
+        """
+        self._durl = self._durl._replace(scheme=scheme)
+
+
+@alru_cache(maxsize=1000)
+async def get_ips(hostname: str) -> set[str]:
+    """
+    Return IPv4 and IPv6 addresses of the given hostname.
+    """
+    ips = set()
+    for type_ in (types.A, types.AAAA):
+        try:
+            res, cached = await resolver.query(hostname, type_)
+            if res:
+                if addr := res.get_record([type_]):
+                    ips.add(addr.data)
+        except:
+            pass
+    return ips
+
+
+def get_url_variants(url: str) -> list[str]:
+    """
+    Return variants of the URL.
+
+    Replace http with https and vice versa;
+    prepend or remove 'www.' to or from the beginning of the hostname.
+    """
+    if url.startswith('http://www.'):
+        s = url.removeprefix('http://www.')
+        return [url, f'http://{s}', f'https://www.{s}', f'https://{s}']
+    elif url.startswith('http://'):
+        s = url.removeprefix('http://')
+        return [url, f'http://www.{s}', f'https://www.{s}', f'https://{s}']
+    elif url.startswith('https://www.'):
+        s = url.removeprefix('https://www.')
+        return [url, f'https://{s}', f'http://www.{s}', f'http://{s}']
+    elif url.startswith('https://'):
+        s = url.removeprefix('https://')
+        return [url, f'https://www.{s}', f'http://www.{s}', f'http://{s}']
+    else:
+        return [url]
+
+
+async def assort_links(
+    links: dict[str, tuple[int, int, list[str]]],
+    durl: Durl,
+    text: str,
+    base_url: str = None,
+) -> tuple[
+    dict[str, tuple[int, int, list[str]]],
+    dict[Durl, tuple[list[str], str]],
+    dict[Durl, tuple[list[str], str]],
+]:
+    """
+    Sort links into a cleaned, an internal and an external dict.
+
+    The cleaned dict maps absolute URLs to char ranges and relations.
+    The internal dict maps absolute URLs to relations and the linked text.
+    The external dict maps absolute URLs to relations and the linked text.
+    The relations are link relations, e.g. rel="canonical".
+
+    The base_url is set, it is used to distinguish internal and external
+    links. If it is not set, the base_url is obtained from *durl*.
+    """
+    res_int = {}
+    res_ext = {}
+    if not base_url:
+        base_url = durl.site().lower()
+    base_durl = await Durl(base_url)
+    cleaned_links = {}
+    for href, (i, f, rel) in links.items():
+        durl = await Durl(href, base=base_durl)
+        if not durl:
+            continue
+        if durl.hostname and in_blacklist(durl.hostname):
+            continue
+        cleaned_links[durl.url()] = i, f, rel
+        txt = text[i:f]
+        if durl.site().lower() == base_url:
+            res_int[durl] = rel, txt
+        else:
+            res_ext[durl] = rel, txt
+    return cleaned_links, res_int, res_ext
diff --git a/src/atextcrawler/utils/html.py b/src/atextcrawler/utils/html.py
new file mode 100644
index 0000000..0cfd800
--- /dev/null
+++ b/src/atextcrawler/utils/html.py
@@ -0,0 +1,136 @@
+"""
+Utilities for extracting information from html.
+"""
+
+import re
+from html import unescape
+from typing import Optional
+
+from bs4 import BeautifulSoup
+
+from .lang import clean_lang
+from .tag import drop_roles, drop_tags, keep_tags
+
+re_ = {
+    'html_lang': re.compile(
+        '<html[^>]*lang\s*=\s*["\']([^"\']*)["\'][^>]*>', re.I | re.S
+    ),
+    'title': re.compile('<title[^>]*>([^<]*)</title>', re.I | re.S),
+    'strip': re.compile(
+        '<(?!/?(' + '|'.join(keep_tags.keys()) + ')[ >])[^>]+>', re.I | re.S
+    ),
+    'empty_tag': re.compile(r'<(?P<tag>\w+)( [^>]*)?>(\s*)</(?P=tag)>', re.S),
+    'whitespace': re.compile('(\s|&nbsp;)+', re.S),
+    'whitespace_': re.compile('\s|&nbsp;?'),  # allow broken &nbsp
+    'whitespace_near_tag': re.compile(
+        '\s*<(br|p|/p|ul|/ul|li|/li|h1|/h1'
+        '|h2|/h2|h3|/h3|h4|/h4|h5|/h5|h6|/h6)>\s*',
+        re.S,
+    ),
+    'whitespace_tag_tag': re.compile('(\s+)((<[^>]+>\s+)+)', re.S),
+    'whitespace_tag_tag_func': re.compile('(<[^>]+>)\s+', re.S),
+    'http_equiv': re.compile('(<meta [^>]*http-equiv[^>]*>)', re.I | re.S),
+}
+
+
+def whitespace_tag_tag(match_obj):
+    """
+    Helper function for removing whitespace between tags.
+    """
+    return ' ' + re_['whitespace_tag_tag_func'].sub(r'\1', match_obj.group(2))
+
+
+def clean_html(s: Optional[str]) -> Optional[str]:
+    """
+    Clean an html string.
+
+    Unescape htmlentities and replace whitespaces with ' ' (ASCII char 0x20).
+
+    See also: https://www.lesinskis.com/python-unicode-whitespace.html
+    """
+    return re_['whitespace_'].sub(' ', unescape(s)).strip() if s else None
+
+
+def get_html_lang(html: str) -> Optional[str]:
+    """
+    Return the language, if any, found in the lang attribute of the html tag.
+    """
+    m = re_['html_lang'].search(html)
+    return clean_lang(m.group(1)) if m else None
+
+
+def extract_title(html: str) -> Optional[str]:
+    """
+    Extract title tags from html returning their content as a string.
+    """
+    if not (titles := re_['title'].findall(html)):
+        return None
+    titles = [clean_html(title) for title in reversed(titles) if title]
+    return ' - '.join(titles).strip(' |')
+
+
+def clean_page(html):
+    """
+    Remove unwanted tags including their content from html.
+
+    Drop tags in *drop_tags* as well as tags with a role in *drop_roles*.
+    Also drop tags with attribute aria-hidden=true.
+
+    Return a beautiful soup.
+    """
+    soup = BeautifulSoup(html, 'html.parser')
+    for tag in drop_tags:
+        for n in soup.find_all(tag):
+            n.decompose()
+    for n in soup.find_all(attrs={'aria-hidden': 'true'}):
+        n.decompose()
+    for role in drop_roles:
+        for n in soup.find_all(attrs={'rel': role}):
+            n.decompose()
+    return soup
+
+
+def clean_body(body):
+    """
+    Clean an html body.
+
+    Remove unwanted tags (keeping their content); remove empty tags;
+    remove and replace whitespaces in several ways.
+
+    In the end the only whitespace is a space and there are no
+    consecutive spaces.
+    """
+    body = re_['strip'].sub(' ', body)
+    body = re_['whitespace_near_tag'].sub(r'<\1>', body)
+    body = re_['whitespace'].sub(' ', body)
+    while re_['empty_tag'].search(body):
+        body = re_['empty_tag'].sub(r'\3', body)
+    body = re_['whitespace_near_tag'].sub(r'<\1>', body)
+    body = re_['whitespace'].sub(' ', body)
+    body = re_['whitespace_tag_tag'].sub(whitespace_tag_tag, body)
+    return body.strip().replace('\u00ad', '')  # soft hyphen
+
+
+def get_html_redirect(html: str) -> Optional[str]:
+    """
+    Return an html redirect in an http-equiv meta tag.
+
+    If none is found, return None.
+    """
+    redir_url = None
+    http_equivs = re_['http_equiv'].findall(html)
+    for raw in http_equivs:
+        tag = BeautifulSoup(raw, 'html.parser').meta
+        if tag and tag.get('http-equiv', '').lower() == 'refresh':
+            if content := tag.get('content'):
+                try:
+                    _, redir_url = content.split(';')
+                    redir_url = (
+                        redir_url.strip()
+                        .removeprefix('url=')
+                        .removeprefix('URL=')
+                        .strip("'")
+                    )
+                except:
+                    pass
+    return redir_url
diff --git a/src/atextcrawler/utils/http.py b/src/atextcrawler/utils/http.py
new file mode 100644
index 0000000..a6bf4c0
--- /dev/null
+++ b/src/atextcrawler/utils/http.py
@@ -0,0 +1,58 @@
+"""
+Utility functions related to http.
+"""
+
+import re
+from typing import Optional
+
+from multidict import CIMultiDictProxy
+
+from ..models import Site
+from .durl import Durl
+
+re_ = {
+    'link_header': re.compile(',\s*(?=<)'),
+    'rel_canonical': re.compile(';\s*rel\s*=\s*["\']?canonical', re.I),
+    'rel_shortlink': re.compile(';\s*rel\s*=\s*["\']?shortlink', re.I),
+}
+
+
+async def get_header_links(
+    headers: CIMultiDictProxy,
+    durl: Durl,
+    site: Optional[Site],
+) -> dict[str, Optional[str]]:
+    """
+    Extract canonical and shortlink links from http headers.
+
+    *durl* must be the Durl of the fetched page and *site* - i fnon None -
+    must be the Site to which the page belongs.
+
+    Return a (default)dict with 'canonical' and 'shortlink' as keys.
+    The values default to None.
+    """
+    res = {}
+    canonical = shortlink = None
+    if 'link' in headers and (link_headers := headers.getall('link')):
+        links = []
+        for link_header in link_headers:
+            links += re_['link_header'].split(link_header)
+        url = durl.url()
+        base_url = site.base_url if site else url
+        base_durl = await Durl(base_url) if base_url else None
+        for link in links:
+            if not canonical and 'canonical' in link.lower():
+                if re_['rel_canonical'].search(link):
+                    canon_url = link.strip().lstrip('<').split('>')[0]
+                    if canon_durl := await Durl(canon_url, base=base_durl):
+                        canonical = canon_durl.url()
+            if not shortlink and 'shortlink' in link.lower():
+                if re_['rel_shortlink'].search(link):
+                    short_url = link.strip().lstrip('<').split('>')[0]
+                    if short_durl := await Durl(short_url, base=base_durl):
+                        shortlink = short_durl.url()
+            if canonical and shortlink:
+                break
+    res['canonical'] = canonical
+    res['shortlink'] = shortlink
+    return res
diff --git a/src/atextcrawler/utils/json.py b/src/atextcrawler/utils/json.py
new file mode 100644
index 0000000..874419f
--- /dev/null
+++ b/src/atextcrawler/utils/json.py
@@ -0,0 +1,32 @@
+"""
+Custom JSON encoder.
+"""
+
+import json
+
+
+class JSONEncoderExt(json.JSONEncoder):
+    """
+    Extended JSON encoder with encoding of sets as lists.
+    """
+
+    def default(self, obj):
+        """
+        Encode sets as lists and everything else as by default.
+        """
+        if isinstance(obj, set):
+            return list(obj)
+        return json.JSONEncoder.default(self, obj)
+
+
+def json_dumps(obj):
+    """
+    Encode an object to a JSON string using JSONEncoderExt.
+    """
+    return json.dumps(obj, cls=JSONEncoderExt)
+
+
+json_loads = json.loads
+"""
+Decoding of JSON strings as by default.
+"""
diff --git a/src/atextcrawler/utils/lang.py b/src/atextcrawler/utils/lang.py
new file mode 100644
index 0000000..72357c6
--- /dev/null
+++ b/src/atextcrawler/utils/lang.py
@@ -0,0 +1,44 @@
+"""
+Utility functions related to languages.
+"""
+
+from pathlib import Path
+from typing import Optional
+
+import gcld3
+
+asset_path = Path(__file__).parent.parent / 'assets'
+
+
+with open(asset_path / 'iso_639-1', 'r') as f:
+    iso_639_1_codes = f.read().strip().split('\n')
+
+
+lang_detector = gcld3.NNetLanguageIdentifier(
+    min_num_bytes=0, max_num_bytes=1000
+)
+
+
+def clean_lang(lang: Optional[str]) -> Optional[str]:
+    """
+    Clean a language code string: it must be an ISO 639-1 code or None.
+    """
+    if lang is None:
+        return None
+    lang = lang[:2].lower()
+    if lang in iso_639_1_codes:
+        return lang
+    return None
+
+
+def extract_content_language(text: str) -> Optional[str]:
+    """
+    Extract the language from a text.
+    """
+    if len(text) < 10:
+        return None
+    lang = None
+    lang_det = lang_detector.FindLanguage(text=text)
+    if lang_det.is_reliable:
+        lang = lang_det.language[:2]
+    return lang
diff --git a/src/atextcrawler/utils/link.py b/src/atextcrawler/utils/link.py
new file mode 100644
index 0000000..e3875e7
--- /dev/null
+++ b/src/atextcrawler/utils/link.py
@@ -0,0 +1,116 @@
+"""
+Hyperlinks (a href, link).
+"""
+
+from pathlib import Path
+from typing import Optional
+
+import tldextract
+
+nofollow_link_rels = set(
+    [
+        'nofollow',
+        'search',
+        'noreferrer',
+        'noopener',
+        'help',
+        'license',
+    ]
+)
+"""
+Do not follow the hrefs in anchor tags with these values of the rel attribute.
+"""
+
+
+meta_names = (
+    'generator',
+    'lang',
+    'language',
+    'description',
+    'keywords',
+    'author',
+    'title',
+    'subject',
+    'revised',
+    'abstract',
+    'topic',
+    'summary',
+    'classfication',
+    'category',
+    'reply-to',
+    'owner',
+    'url',
+    'identifier-URL',
+    'geo.position',
+    'geo.region',
+    'geo.placename',
+    'dc.language',
+)
+"""
+Values of the name attribute of meta tags to keep.
+
+See also: https://gist.github.com/lancejpollard/1978404
+See also: https://github.com/joshbuchea/HEAD
+"""
+
+
+meta_props = (
+    'og:site_name',
+    'og:locale',
+    'og:type',
+    'og:latitude',
+    'og:longitude',
+    'og:street',
+    'og:locality',
+    'og:region',
+    'og:postal',
+    'og:country',
+)
+"""
+Values of the property attribute of meta tags to keep.
+"""
+
+
+link_rels = set(
+    [
+        'webmention',
+        'pingback',
+        'alternate',
+        'canonical',
+        'author',
+    ]
+)
+"""
+Values of the rel attribute of link tags to keep.
+"""
+
+
+def load_blacklist():
+    """
+    Return the 10000 most popular internet domains.
+    """
+    path = Path(__file__).parent.parent / 'assets' / 'top_1e4'
+    with open(path, 'r') as file:
+        domains = file.read().strip().splitlines()
+    return domains
+
+
+domain_blacklist = load_blacklist()
+
+
+def in_blacklist(hostname: str) -> Optional[str]:
+    """
+    Return a match of host in the blacklist, or None.
+    """
+    domain = extract_domain(hostname)
+    if domain in domain_blacklist:
+        return hostname
+    return None
+
+
+def extract_domain(hostname: str) -> str:
+    """
+    Extract the lower-case domain from a hostname.
+    """
+    levels = tldextract.extract(hostname)
+    return '.'.join(levels[-2:]).lower()
diff --git a/src/atextcrawler/utils/muse.py b/src/atextcrawler/utils/muse.py
new file mode 100644
index 0000000..467122e
--- /dev/null
+++ b/src/atextcrawler/utils/muse.py
@@ -0,0 +1,120 @@
+"""
+Parse muse-formatted plaintext (delivered by amusewiki).
+"""
+
+import re
+from datetime import datetime
+from typing import Optional
+
+from .date_finder import extract_latest_date
+from .lang import clean_lang
+
+re_tag = re.compile(r'<[^<]+?>')
+
+
+def parse_muse(text: str) -> Optional[tuple[dict, str]]:
+    """
+    Parse a MUSE string returning meta information and the text body.
+    """
+    head, body = split_head_body(text)
+    if not head:
+        return None
+    meta = parse_head(head)
+    if not meta:
+        return None
+    return extract_muse_meta(meta, body), body
+
+
+def split_head_body(text: str) -> tuple[str, str]:
+    """
+    Split a MUSE string into head and body and return both.
+    """
+    head = ''
+    while text.startswith('#'):
+        line_end = text.find('\n') + 1
+        head += text[:line_end]
+        text = text[line_end:]
+    return head.strip(), text.strip()
+
+
+def parse_head(text: str) -> dict:
+    """
+    Parse a MUSE head and return a dict mapping field names to values.
+    """
+    fields = {}
+    for line in text.split('\n'):
+        name, value = line.strip().split(' ', 1)
+        fields[name[1:]] = value
+    return fields
+
+
+amusewiki_fields = [
+    'author',
+    'title',
+    'lang',
+    'LISTtitle',  # reduced title for alphabetical sorting
+    'subtitle',
+    'SORTauthors',  # authors separated by ';' or ',' (only for indexing)
+    'SORTtopics',  # topics separated by ';' or ',' (only for indexing)
+    'date',  # publication year
+    'pubdate',  # publication datetime
+    'notes',  # additional info (orig title, translators, credits, ...)
+    'source',  # preferred format: "Retrieved on March 8, 2012 from {URL}"
+    'publisher',
+    'isbn',
+    #'rights',
+    'seriesname',
+    'seriesnumber',
+    #'hyphenation',       # irrelevant
+    #'slides',            # irrelevant
+    #'DELETED',           # irrelevant
+    #'cover',             # irrelevant
+    #'coverwidth',        # irrelevant
+    #'nocoverpage',       # irrelevant
+    #'notoc',             # irrelevant
+    #'nofinalpage',       # irrelevant
+    #'impressum',         # irrelevant
+    #'continuefootnotes', # irrelevant
+    #'centerchapter',     # irrelevant
+    #'centersection',     # irrelevant
+]
+"""
+Amusewiki fields are (cf. https://amusewiki.org/library/manual)
+"""
+
+
+re_list = re.compile('[;,]')
+
+
+def extract_muse_meta(meta, body) -> dict:
+    """
+    Extract meta information from muse header and muse body.
+    """
+    authors = set()
+    if author := meta.get('author', '').strip():
+        authors.add(author)
+    if sortauthors := meta.get('SORTauthors', '').strip():
+        for author in re_list.split(sortauthors):
+            if author_ := author.strip():
+                authors.add(author_)
+    pubdate = meta.get('pubdate').strip()
+    pub_date: Optional[datetime] = None
+    if pubdate:
+        try:
+            pub_date = datetime.fromisoformat(pubdate)
+        except:
+            pub_date = extract_latest_date(pubdate)
+    summary = re_tag.sub('', body[:1000].split('\n\n')[0])
+    return {
+        'title': re_tag.sub('', meta.get('title', '')) or None,
+        'authors': authors,
+        'lang': clean_lang(meta.get('lang')),
+        'keywords': [
+            s.strip()
+            for s in re_list.split(meta.get('SORTtopics', '').strip())
+            if s.strip()
+        ],
+        'pub_date': pub_date,
+        'summary': summary,
+        'orig_source': meta.get('source', '').strip() or None,
+    }
diff --git a/src/atextcrawler/utils/probe.py b/src/atextcrawler/utils/probe.py
new file mode 100644
index 0000000..0987433
--- /dev/null
+++ b/src/atextcrawler/utils/probe.py
@@ -0,0 +1,22 @@
+"""
+Utility functions for probing / sampling.
+"""
+
+
+def extract_samples(items, n=5):
+    """
+    Extract up to n sample elements from the the given dict or list.
+
+    If *items* is a dict return the elements from the list of keys.
+    """
+    l = len(items)
+    if l <= n:
+        return items
+    poss = []
+    step = (l + 1) / n
+    for i in range(n):
+        pos = int(step * i)
+        if pos < l and (not poss or pos > poss[-1]):
+            poss.append(pos)
+    items_list = list(items)
+    return [items_list[pos] for pos in poss]
diff --git a/src/atextcrawler/utils/section.py b/src/atextcrawler/utils/section.py
new file mode 100644
index 0000000..302f60f
--- /dev/null
+++ b/src/atextcrawler/utils/section.py
@@ -0,0 +1,74 @@
+"""
+Operations on text sections.
+
+Semantic breaks are character positions within a text (0-offset)
+where a new section begins. More precisely, the character position
+contains a space and only at the next position begins a tag that is
+semantically breaking (e.g., a h1 or a br).
+
+Each semantic break has a level, which means breaking strength.
+The lower the level (e.g., h1 has a lower level than h2), the
+stronger the break.
+
+Implicitly, if position 0 has no semantic break, a semantic break
+at position 0 with level 80 is added.
+
+Semantic breaks can be used to split a text into sections.
+The lower the maximum level of the semantic breaks taken into account,
+the coarser the segmentation and the fewer the sections.
+Each section is given the level of the semantic break at ist beginning.
+
+From another point of view, sections have levels indicating
+the segmentation depth.
+
+The levels for html tags are defined in tag.py.
+
+The *semantic_breaks* argument in the functions below
+is a dict mapping the character position of the semantic break
+to the level of a section beginning at this position
+(if segmentation is done at this or a higher level).
+"""
+
+
+def iter_sections(text, semantic_breaks, max_level=59):
+    """
+    Iterate over sections, limiting to those with a maximum level.
+
+    Yield (start_pos, end_pos, level, text).
+    *text* is assumed to have the first semantic break at position 0.
+    """
+    n = len(text)
+    last_pos = 0
+    last_level = semantic_breaks.get(0, 80)
+    for pos, level in sorted(semantic_breaks.items()):
+        if level <= max_level and last_pos != pos:
+            yield last_pos, pos, last_level, text[last_pos + 1 : pos]
+            last_pos = pos
+            last_level = level
+    if last_pos < n:
+        yield last_pos, n, last_level, text[last_pos:]
+
+
+def concat_section_texts(text, semantic_breaks, min_len=2000):
+    """
+    Try to concat consecutive sections into chunks with a minimum length.
+
+    Yield (section_ids, combined_text).
+    """
+    n = len(text)
+    last_pos = 0
+    section_ids = []
+    for section_id, pos in enumerate(semantic_breaks.keys()):
+        if pos >= last_pos + min_len:
+            if n - pos < min_len:
+                for id_ in [
+                    i for i, k in enumerate(semantic_breaks.keys()) if k >= pos
+                ]:
+                    section_ids.append(id_)
+                pos = n
+            yield section_ids, text[last_pos:pos]
+            last_pos = pos
+            section_ids = []
+        section_ids.append(section_id)
+    if last_pos < n:
+        yield section_ids, text[last_pos:]
diff --git a/src/atextcrawler/utils/similarity.py b/src/atextcrawler/utils/similarity.py
new file mode 100644
index 0000000..b739056
--- /dev/null
+++ b/src/atextcrawler/utils/similarity.py
@@ -0,0 +1,92 @@
+"""
+Text similarity with simhash.
+"""
+
+import logging
+
+from asyncpg import Connection
+from simhash import Simhash, SimhashIndex
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.ERROR)
+
+
+postgresql_bigint_offset = 9223372036854775808
+"""
+Subtract this number to get a PostgreSQL bigint from a 64bit int.
+"""
+
+
+def get_features(txt: str) -> list[str]:
+    """
+    Extract features from string for use with Simhash.
+    """
+    width = 3
+    txt = txt.replace(' ', '').lower()
+    return [txt[i : i + width] for i in range(max(len(txt) - width + 1, 1))]
+
+
+def simhash_to_bigint(simhash: Simhash) -> int:
+    """
+    Convert a simhash to PostgreSQL's bigint value range.
+    """
+    return simhash.value - postgresql_bigint_offset
+
+
+def simhash_from_bigint(bigint: int) -> Simhash:
+    """
+    Convert a simhash from PostgreSQL's bigint to a Simhash instance.
+    """
+    return Simhash(bigint + postgresql_bigint_offset, log=logger)
+
+
+def get_simhash(text: str) -> Simhash:
+    """
+    Return the Simhash of the given text.
+    """
+    return Simhash(get_features(text), log=logger)
+
+
+async def get_simhash_index(conn: Connection, site_id: int) -> SimhashIndex:
+    """
+    Return a simhash index with hashes of all stored resources of the site.
+    """
+    sql = (
+        "SELECT r.id, r.simhash FROM site_path sp, resource r"
+        " WHERE sp.site_id=$1 AND sp.resource_id=r.id"
+    )
+    rows = await conn.fetch(sql, site_id)
+    objs = [
+        (
+            str(row['id']),
+            Simhash(row['simhash'] + postgresql_bigint_offset, log=logger),
+        )
+        for row in rows
+    ]
+    return SimhashIndex(objs, k=3, log=logger)
+
+
+def create_simhash(
+    index: SimhashIndex,
+    resource_id: int,
+    simhash_instance: Simhash,
+) -> int:
+    """
+    Add a resource with given id and simhash to a simhash index.
+
+    Return the simhash value shifted into PostgreSQL's bigint range.
+
+    (The simhash field of the resource's database entry is not updated.)
+    """
+    index.add(str(resource_id), simhash_instance)
+    return simhash_to_bigint(simhash_instance)
+
+
+def search_simhash(index: SimhashIndex, simhash_inst: Simhash) -> list[int]:
+    """
+    Return the ids of similar resources from the index.
+    """
+    found = index.get_near_dups(simhash_inst)
+    if found:
+        return sorted([int(elem) for elem in found])
+    return []
diff --git a/src/atextcrawler/utils/tag.py b/src/atextcrawler/utils/tag.py
new file mode 100644
index 0000000..d636928
--- /dev/null
+++ b/src/atextcrawler/utils/tag.py
@@ -0,0 +1,189 @@
+"""
+Information collections related to html tags.
+"""
+
+
+drop_tags = [
+    'applet',
+    'area',
+    'audio',
+    'base',
+    'basefont',
+    'bdi',
+    'bdo',
+    'button',
+    'canvas',
+    'code',
+    'command',
+    'data',
+    'datalist',
+    'dir',
+    'embed',
+    'fieldset',
+    'figure',
+    'form',
+    'frame',
+    'frameset',
+    'iframe',
+    'img',
+    'input',
+    'label',
+    'legend',
+    'map',
+    'menuitem',
+    'meter',
+    'noframes',
+    'noscript',
+    'object',
+    'optgroup',
+    'option',
+    'param',
+    'picture',
+    'progress',
+    'rp',
+    'rt',
+    'ruby',
+    'samp',
+    'script',
+    'select',
+    'source',
+    'style',
+    'svg',
+    'template',
+    'textarea',
+    'track',
+    'var',
+    'video',
+]
+"""
+Tags to drop, including their content.
+"""
+
+
+keep_tags = {
+    'a': (0, 0, ''),
+    'abbr': (0, 0, 'st'),
+    'acronym': (0, 0, 'st'),
+    'address': (1, 0, 'm'),
+    'article': (1, 15, ''),
+    'aside': (1, 0, 'd'),
+    'b': (0, 0, 'st'),
+    'blockquote': (1, 65, 'q'),
+    'br': (1, 80, ''),
+    'caption': (1, 68, ''),
+    'center': (1, 50, ''),
+    'cite': (1, 0, 'd'),
+    'col': (1, 75, ''),
+    'colgroup': (1, 73, ''),
+    'dd': (1, 70, 'li'),
+    'del': (0, 0, 'se'),
+    'details': (1, 0, 'd'),
+    'dfn': (0, 0, 'st'),
+    'div': (1, 60, ''),  # lvl often revised to min of contained tags
+    'dl': (1, 70, 'l'),
+    'dt': (1, 70, 'li'),
+    'em': (0, 0, 'st'),
+    'figcaption': (1, 0, ''),
+    'font': (0, 0, 's'),
+    'footer': (1, 15, ''),
+    'h1': (1, 30, ''),
+    'h2': (1, 32, ''),
+    'h3': (1, 34, ''),
+    'h4': (1, 36, ''),
+    'h5': (1, 38, ''),
+    'h6': (1, 40, ''),
+    'header': (1, 15, ''),
+    'hr': (1, 30, ''),
+    'i': (0, 0, 'st'),
+    'ins': (0, 0, 'se'),
+    'li': (1, 75, 'li'),  # lvl revised if not inside p
+    'main': (1, 10, ''),
+    'mark': (0, 0, 's'),
+    'nav': (1, 0, ''),  # keep for footnotes
+    'ol': (1, 70, 'l'),  # lvl revised if not inside p
+    'p': (1, 60, ''),
+    'pre': (1, 65, 'q'),
+    'q': (1, 0, 'q'),
+    's': (0, 0, ''),
+    'section': (1, 24, ''),
+    'small': (0, 0, 'd'),
+    'span': (0, 0, 's'),
+    'strike': (0, 0, 'se'),
+    'strong': (0, 0, 'st'),
+    'sub': (0, 0, ''),
+    'summary': (1, 20, 'm'),
+    'sup': (0, 0, ''),
+    'table': (1, 65, ''),
+    'tbody': (1, 70, ''),
+    'td': (1, 78, ''),
+    'tfoot': (1, 70, ''),
+    'th': (1, 75, ''),
+    'thead': (1, 70, ''),
+    'time': (0, 0, 'm'),
+    'tr': (1, 75, ''),
+    'u': (0, 0, 's'),
+    'ul': (1, 70, 'l'),  # lvl revised if not inside p
+}
+"""
+Tags to keep for annotation, and their properties.
+
+The properties are:
+
+  * sep: whether to separate text at both sides of the tag with a space
+  * lvl: structural depth level of content of this tag;
+         the paragraph level is 60; headings are below 60, listings above;
+         a div below the tag will usually have the tag's depth + 1
+  * sem: semantic categories: zero or more of
+    * s=span
+    * l=listing
+    * i=list_item
+    * t=term
+    * e=edit
+    * d=details
+    * q=quote
+    * m=meta
+    * x=exclude
+"""
+
+
+self_closing_tags = ('br', 'hr')
+"""
+Those among keep_tags which are self-closing.
+"""
+
+
+all_self_closing_tags = (
+    'area',
+    'base',
+    'br',
+    'col',
+    'embed',
+    'hr',
+    'img',
+    'input',
+    'link',
+    'meta',
+    'param',
+    'source',
+    'track',
+    'wbr',
+)
+"""
+All self-closing tags of the html standard.
+"""
+
+
+drop_roles = (
+    'banner',
+    'complementary',
+    'contentinfo',
+    'dialog',
+    'figure',
+    'form',
+    'img',
+    'search',
+    'switch',
+)
+"""
+Drop tags with these aria roles.
+"""
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..b5f7e34
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,7 @@
+from .annotation import AnnotateTest
+from .date_finder import DateFinderTest
+from .page import PageCleanTest
+from .section import IterSectionTest, AggSectionTest
+from .simhash import SimhashTest
+from .text import CleanHtmlTest
+from .durl import DurlTest
diff --git a/tests/annotation.py b/tests/annotation.py
new file mode 100644
index 0000000..f82c68d
--- /dev/null
+++ b/tests/annotation.py
@@ -0,0 +1,49 @@
+"""
+Test cases for resource type page.
+"""
+
+from unittest import TestCase
+
+from atextcrawler.utils.annotation import annotate
+
+
+class AnnotateTest(TestCase):
+    """
+    Test annotation.
+
+    Consider that the <br> and <hr> tags are self-closing.
+    """
+
+    def test_annotate_1(self):
+        s = '<em>Hello</em><br><strong>world</strong>'
+        text, anns = annotate(s)
+        self.assertEqual(text, ' Hello world')
+        self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 80})
+        self.assertEqual(anns['section_ids'], {})
+
+    def test_annotate_2(self):
+        s = '<em> Hello </em><br><strong> world </strong>'
+        text, anns = annotate(s)
+        self.assertEqual(text, ' Hello world')
+        self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 80})
+        self.assertEqual(anns['section_ids'], {})
+
+    def test_annotate_3(self):
+        s = '<p> Hello <em>world</em> </p> '
+        text, anns = annotate(s)
+        self.assertEqual(text, ' Hello world')
+        self.assertEqual(anns['semantic_breaks'], {0: 60})
+
+    def test_annotate_4(self):
+        s = '<div id = "ref1"><p>Hello <em>world</em> </p> </div>'
+        text, anns = annotate(s)
+        self.assertEqual(text, ' Hello world')
+        self.assertEqual(anns['semantic_breaks'], {0: 60})
+        self.assertEqual(anns['section_ids'], {0: ['ref1']})
+
+    def test_annotate_5(self):
+        s = '<br id="ref2"> Hello <p>world </p> '
+        text, anns = annotate(s)
+        self.assertEqual(text, ' Hello world')
+        self.assertEqual(anns['semantic_breaks'], {0: 80, 6: 60})
+        self.assertEqual(anns['section_ids'], {1: ['ref2']})
diff --git a/tests/date_finder.py b/tests/date_finder.py
new file mode 100644
index 0000000..add02f1
--- /dev/null
+++ b/tests/date_finder.py
@@ -0,0 +1,20 @@
+from datetime import datetime
+from unittest import TestCase
+
+from atextcrawler.utils.date_finder import extract_latest_date
+
+
+class DateFinderTest(TestCase):
+    def test_extract_latest_date(self):
+        s = 'test 1987-2+1-no'
+        r = datetime(1987, 2, 1)
+        self.assertEqual(extract_latest_date(s), r)
+        s = '2020-04-06, whatever and 1987-2-1, 1/20/2021'
+        r = datetime(2020, 4, 6)
+        self.assertEqual(extract_latest_date(s, lang='de'), r)
+        s = 'test 2022-04-06, whatever and 1987-2-1, 1/20/2021'
+        r = datetime(2021, 1, 20)
+        self.assertEqual(extract_latest_date(s, lang='en'), r)
+        s = ''
+        r = None
+        self.assertEqual(extract_latest_date(s), r)
diff --git a/tests/durl.py b/tests/durl.py
new file mode 100644
index 0000000..f805557
--- /dev/null
+++ b/tests/durl.py
@@ -0,0 +1,68 @@
+from unittest import IsolatedAsyncioTestCase
+import asyncpg
+from atextcrawler.utils.durl import Durl
+from atextcrawler.config import Config
+from atextcrawler.db import PGPool
+
+
+class DurlTest(IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        config = Config().get()
+        self.pool = PGPool(config['postgresql'])
+        await self.pool.__aenter__()
+        self.conn = await self.pool.pool.acquire()
+
+    async def test_durl_basic(self):
+        durl1 = await Durl('https://U:Pw@www.EXAMPLE.com:8000/hello?world#a')
+        self.assertEqual(durl1.scheme, 'https')
+        self.assertEqual(durl1.netloc, 'U:Pw@www.example.com:8000')
+        self.assertEqual(durl1.port, 8000)
+        self.assertEqual(durl1.path, '/hello')
+        self.assertEqual(durl1.fragment, '')
+        self.assertEqual(durl1.pwa(), 'hello?world')
+        self.assertEqual(durl1.site(), 'https://U:Pw@www.example.com:8000/')
+        self.assertEqual(
+            durl1.url(), 'https://U:Pw@www.example.com:8000/' 'hello?world'
+        )
+        self.assertEqual(durl1.has_path(), True)
+        durl2 = await Durl('http://www.example.com/')
+        self.assertEqual(durl2.has_path(), False)
+        durl3 = await Durl('ftp://www.example.com/')
+        self.assertEqual(durl3, None)
+
+    async def test_durl_with_base(self):
+        durl1 = await Durl('https://www.example.com')
+        self.assertEqual(durl1.path, '/')
+        self.assertEqual(durl1.pwa(), '')
+        self.assertEqual(durl1.has_path(), False)
+        durl2 = await Durl('https://www.example.com/hello2', base=durl1)
+        self.assertEqual(durl2.hostname, 'www.example.com')
+        self.assertEqual(durl2.path, '/hello2')
+        self.assertEqual(durl2.pwa(), 'hello2')
+        durl3 = await Durl('/hello3?x=1', base=durl1)
+        self.assertEqual(durl3.hostname, 'www.example.com')
+        self.assertEqual(durl3.path, '/hello3')
+        self.assertEqual(durl3.pwa(), 'hello3?x=1')
+        self.assertEqual(durl3.site(), 'https://www.example.com/')
+        durl4 = await Durl('https://www.kernel.org/', base=durl1)
+        self.assertEqual(durl4, None)
+
+    async def test_durl_with_base_and_match_base(self):
+        durl1 = await Durl('https://www.example.com/base/path/')
+        self.assertEqual(durl1.path, '/base/path/')
+        self.assertEqual(durl1.pwa(), 'base/path/')
+        self.assertEqual(durl1.has_path(), True)
+        durl2 = await Durl(
+            'https://www.example.com/base/', base=durl1, match_base=True
+        )
+        self.assertEqual(durl2, None)
+        durl3 = await Durl(
+            'https://www.example.com/base/path/whatever?x=1#a',
+            base=durl1,
+            match_base=True,
+        )
+        self.assertEqual(durl3.pwa(), 'whatever?x=1')
+
+    async def asyncTearDown(self):
+        await self.pool.pool.release(self.conn)
+        await self.pool.pool.close()
diff --git a/tests/page.py b/tests/page.py
new file mode 100644
index 0000000..9cb76bc
--- /dev/null
+++ b/tests/page.py
@@ -0,0 +1,24 @@
+"""
+Test cases for resource type page.
+"""
+
+from unittest import TestCase
+from atextcrawler.utils.html import clean_body
+
+# from atextcrawler.utils.tag import drop_tags
+
+
+class PageCleanTest(TestCase):
+    def test_clean_body_1(self):
+        s = ' <em>Hello</em> <strong>world</strong> '
+        r = '<em>Hello</em> <strong>world</strong>'
+        self.assertEqual(clean_body(s), r)
+
+
+#    def test_drop_tags(self):
+#        s = '<figure what="ever">something<figure>else</figure>...</figure>'
+#        r = drop_tags(s)
+#        self.assertEqual(r, '')
+#        s = '<rt><rt><rt><rt>something</rt></rt></rt></rt>'
+#        r = drop_tags(s)
+#        self.assertEqual(r, '')
diff --git a/tests/section.py b/tests/section.py
new file mode 100644
index 0000000..be47a8b
--- /dev/null
+++ b/tests/section.py
@@ -0,0 +1,105 @@
+from unittest import TestCase
+
+from atextcrawler.utils.section import concat_section_texts, iter_sections
+
+
+class IterSectionTest(TestCase):
+    def test_iter_sections_1(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 80, 5: 2, 15: 1, 20: 3}
+        sections1 = list(iter_sections(s, sb, max_level=100))
+        sections2 = [
+            (0, 5, 80, 'bcde'),
+            (5, 15, 2, 'ghijklmno'),
+            (15, 20, 1, 'qrst'),
+            (20, 26, 3, 'uvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_iter_sections_2(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 4, 5: 2, 15: 1, 20: 3, 26: 9}
+        sections1 = list(iter_sections(s, sb, max_level=100))
+        sections2 = [
+            (0, 5, 4, 'bcde'),
+            (5, 15, 2, 'ghijklmno'),
+            (15, 20, 1, 'qrst'),
+            (20, 26, 3, 'vwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_iter_sections_3(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {5: 2, 15: 60, 18: 50, 20: 3}
+        sections1 = list(iter_sections(s, sb, max_level=59))
+        sections2 = [
+            (0, 5, 80, 'bcde'),
+            (5, 18, 2, 'ghijklmnopqr'),
+            (18, 20, 50, 't'),
+            (20, 26, 3, 'uvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_iter_sections_4(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {5: 2, 15: 60, 18: 50, 20: 3, 24: 60}
+        sections1 = list(iter_sections(s, sb, max_level=59))
+        sections2 = [
+            (0, 5, 80, 'bcde'),
+            (5, 18, 2, 'ghijklmnopqr'),
+            (18, 20, 50, 't'),
+            (20, 26, 3, 'uvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+
+class AggSectionTest(TestCase):
+    def test_concat_sections_1(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 1, 5: 1, 15: 1, 20: 1}
+        sections1 = list(concat_section_texts(s, sb, min_len=10))
+        sections2 = [
+            ([0, 1], 'abcdefghijklmno'),
+            ([2, 3], 'pqrstuvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_concat_sections_2(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 1, 2: 1, 10: 1, 20: 1, 26: 1}
+        sections1 = list(concat_section_texts(s, sb, min_len=10))
+        sections2 = [
+            ([0, 1], 'abcdefghij'),
+            ([2, 3, 4], 'klmnopqrstuvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_concat_sections_3(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 1, 4: 1, 6: 1, 16: 1, 26: 1}
+        sections1 = list(concat_section_texts(s, sb, min_len=10))
+        sections2 = [
+            ([0, 1, 2], 'abcdefghijklmnop'),
+            ([3, 4], 'qrstuvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_concat_sections_4(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 1, 5: 1, 15: 1, 26: 1}
+        sections1 = list(concat_section_texts(s, sb, min_len=10))
+        sections2 = [
+            ([0, 1], 'abcdefghijklmno'),
+            ([2, 3], 'pqrstuvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
+
+    def test_concat_sections_5(self):
+        s = 'abcdefghijklmnopqrstuvwxyz'
+        sb = {0: 1, 5: 1, 12: 1, 22: 1, 23: 1, 24: 1, 26: 1}
+        sections1 = list(concat_section_texts(s, sb, min_len=10))
+        sections2 = [
+            ([0, 1], 'abcdefghijkl'),
+            ([2, 3, 4, 5, 6], 'mnopqrstuvwxyz'),
+        ]
+        self.assertEqual(sections1, sections2)
diff --git a/tests/simhash.py b/tests/simhash.py
new file mode 100644
index 0000000..095d244
--- /dev/null
+++ b/tests/simhash.py
@@ -0,0 +1,54 @@
+"""
+Test cases for text util.
+"""
+
+from unittest import TestCase
+from simhash import Simhash, SimhashIndex
+from atextcrawler.utils.similarity import (
+    create_simhash,
+    get_features,
+    get_simhash,
+    postgresql_bigint_offset,
+    search_simhash,
+)
+
+
+class SimhashTest(TestCase):
+    """
+    Test simhash creation and search.
+    """
+
+    def test_search(self):
+        n1 = int('1111111100000000', 2)
+        n2 = int('1111111100000111', 2)
+        n3 = int('1000000000000000', 2)
+        n4 = int('1000000000000111', 2)
+        n5 = int('1000001111000000', 2)
+        objs = [
+            ('1', Simhash(n1)),
+            ('3', Simhash(n3)),
+            ('4', Simhash(n4)),
+        ]
+        index = SimhashIndex(objs, k=3)
+        found = search_simhash(index, Simhash(n5))
+        self.assertEqual(found, [])
+        found = search_simhash(index, Simhash(n1))
+        self.assertEqual(found, [1])
+        found = search_simhash(index, Simhash(n2))
+        self.assertEqual(found, [1])
+        found = search_simhash(index, Simhash(n4))
+        self.assertEqual(found, [3, 4])
+
+    def test_create(self):
+        index = SimhashIndex([], k=3)
+        hash_val_1 = create_simhash(index, 101, get_simhash('hello ' * 20))
+        hash_val_2 = create_simhash(index, 102, get_simhash('another one'))
+        simhash_1 = Simhash(hash_val_1 + postgresql_bigint_offset)
+        simhash_2 = Simhash(hash_val_2 + postgresql_bigint_offset)
+        found = search_simhash(index, simhash_1)
+        self.assertEqual(found, [101])
+        found = search_simhash(index, simhash_2)
+        self.assertEqual(found, [102])
+        simhash_3 = get_simhash('hello ' * 20 + 'X')
+        found = search_simhash(index, simhash_3)
+        self.assertEqual(found, [101])
diff --git a/tests/text.py b/tests/text.py
new file mode 100644
index 0000000..2460459
--- /dev/null
+++ b/tests/text.py
@@ -0,0 +1,65 @@
+"""
+Test cases for text util.
+"""
+
+from unittest import TestCase
+from atextcrawler.utils.html import clean_page
+
+
+class CleanHtmlTest(TestCase):
+    """
+    Test clean_page.
+
+    Have an eye on self-closing tags (br, hr, ...).
+    """
+
+    def test_clean_page_1(self):
+        s = '<em>Hello</em><br><script>malicious="<script>"</script>anything'
+        r = '<em>Hello</em><br/>anything'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_2(self):
+        s = '<em>Hello</em><br /><script>malicious<script></script>anything'
+        r = '<em>Hello</em><br/>anything'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_3(self):
+        # nesting
+        s = '--<figure>xx<figure>yy</figure>zz</figure>..'
+        r = '--..'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_4(self):
+        # aria-hidden
+        s = '--<p aria-hidden=true>xx</p>..'
+        r = '--..'
+        self.assertEqual(str(clean_page(s)), r)
+        s = '--<p aria-hidden="true">xx</p>..'
+        r = '--..'
+        self.assertEqual(str(clean_page(s)), r)
+        s = '--<p aria-hidden=false>xx</p>..'
+        r = '--<p aria-hidden="false">xx</p>..'
+        self.assertEqual(str(clean_page(s)), r)
+        s = '--<p aria-hidden="false">xx</p>..'
+        r = '--<p aria-hidden="false">xx</p>..'
+        self.assertEqual(str(clean_page(s)), r)
+        s = '--<p aria-hidden=??>xx</p>..'
+        r = '--<p aria-hidden="??">xx</p>..'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_5(self):
+        # no removal
+        s = '--<p>xx<em>yy</em></p>..'
+        r = '--<p>xx<em>yy</em></p>..'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_6(self):
+        # self-closing tags to be removed
+        s = '--<area /><p>xx</p>\n...<h1>tt<area /></h1>nn'
+        r = '--<p>xx</p>\n...<h1>tt</h1>nn'
+        self.assertEqual(str(clean_page(s)), r)
+
+    def test_clean_page_7(self):
+        s = '--<p rel=search>tt<area /></p>nn'
+        r = '--nn'
+        self.assertEqual(str(clean_page(s)), r)