atextcrawler/tests/durl.py

69 lines
2.8 KiB
Python

from unittest import IsolatedAsyncioTestCase
import asyncpg
from atextcrawler.utils.durl import Durl
from atextcrawler.config import Config
from atextcrawler.db import PGPool
class DurlTest(IsolatedAsyncioTestCase):
async def asyncSetUp(self):
config = Config().get()
self.pool = PGPool(config['postgresql'])
await self.pool.__aenter__()
self.conn = await self.pool.pool.acquire()
async def test_durl_basic(self):
durl1 = await Durl('https://U:Pw@www.EXAMPLE.com:8000/hello?world#a')
self.assertEqual(durl1.scheme, 'https')
self.assertEqual(durl1.netloc, 'U:Pw@www.example.com:8000')
self.assertEqual(durl1.port, 8000)
self.assertEqual(durl1.path, '/hello')
self.assertEqual(durl1.fragment, '')
self.assertEqual(durl1.pwa(), 'hello?world')
self.assertEqual(durl1.site(), 'https://U:Pw@www.example.com:8000/')
self.assertEqual(
durl1.url(), 'https://U:Pw@www.example.com:8000/' 'hello?world'
)
self.assertEqual(durl1.has_path(), True)
durl2 = await Durl('http://www.example.com/')
self.assertEqual(durl2.has_path(), False)
durl3 = await Durl('ftp://www.example.com/')
self.assertEqual(durl3, None)
async def test_durl_with_base(self):
durl1 = await Durl('https://www.example.com')
self.assertEqual(durl1.path, '/')
self.assertEqual(durl1.pwa(), '')
self.assertEqual(durl1.has_path(), False)
durl2 = await Durl('https://www.example.com/hello2', base=durl1)
self.assertEqual(durl2.hostname, 'www.example.com')
self.assertEqual(durl2.path, '/hello2')
self.assertEqual(durl2.pwa(), 'hello2')
durl3 = await Durl('/hello3?x=1', base=durl1)
self.assertEqual(durl3.hostname, 'www.example.com')
self.assertEqual(durl3.path, '/hello3')
self.assertEqual(durl3.pwa(), 'hello3?x=1')
self.assertEqual(durl3.site(), 'https://www.example.com/')
durl4 = await Durl('https://www.kernel.org/', base=durl1)
self.assertEqual(durl4, None)
async def test_durl_with_base_and_match_base(self):
durl1 = await Durl('https://www.example.com/base/path/')
self.assertEqual(durl1.path, '/base/path/')
self.assertEqual(durl1.pwa(), 'base/path/')
self.assertEqual(durl1.has_path(), True)
durl2 = await Durl(
'https://www.example.com/base/', base=durl1, match_base=True
)
self.assertEqual(durl2, None)
durl3 = await Durl(
'https://www.example.com/base/path/whatever?x=1#a',
base=durl1,
match_base=True,
)
self.assertEqual(durl3.pwa(), 'whatever?x=1')
async def asyncTearDown(self):
await self.pool.pool.release(self.conn)
await self.pool.pool.close()