from unittest import IsolatedAsyncioTestCase import asyncpg from atextcrawler.utils.durl import Durl from atextcrawler.config import Config from atextcrawler.db import PGPool class DurlTest(IsolatedAsyncioTestCase): async def asyncSetUp(self): config = Config().get() self.pool = PGPool(config['postgresql']) await self.pool.__aenter__() self.conn = await self.pool.pool.acquire() async def test_durl_basic(self): durl1 = await Durl('https://U:Pw@www.EXAMPLE.com:8000/hello?world#a') self.assertEqual(durl1.scheme, 'https') self.assertEqual(durl1.netloc, 'U:Pw@www.example.com:8000') self.assertEqual(durl1.port, 8000) self.assertEqual(durl1.path, '/hello') self.assertEqual(durl1.fragment, '') self.assertEqual(durl1.pwa(), 'hello?world') self.assertEqual(durl1.site(), 'https://U:Pw@www.example.com:8000/') self.assertEqual( durl1.url(), 'https://U:Pw@www.example.com:8000/' 'hello?world' ) self.assertEqual(durl1.has_path(), True) durl2 = await Durl('http://www.example.com/') self.assertEqual(durl2.has_path(), False) durl3 = await Durl('ftp://www.example.com/') self.assertEqual(durl3, None) async def test_durl_with_base(self): durl1 = await Durl('https://www.example.com') self.assertEqual(durl1.path, '/') self.assertEqual(durl1.pwa(), '') self.assertEqual(durl1.has_path(), False) durl2 = await Durl('https://www.example.com/hello2', base=durl1) self.assertEqual(durl2.hostname, 'www.example.com') self.assertEqual(durl2.path, '/hello2') self.assertEqual(durl2.pwa(), 'hello2') durl3 = await Durl('/hello3?x=1', base=durl1) self.assertEqual(durl3.hostname, 'www.example.com') self.assertEqual(durl3.path, '/hello3') self.assertEqual(durl3.pwa(), 'hello3?x=1') self.assertEqual(durl3.site(), 'https://www.example.com/') durl4 = await Durl('https://www.kernel.org/', base=durl1) self.assertEqual(durl4, None) async def test_durl_with_base_and_match_base(self): durl1 = await Durl('https://www.example.com/base/path/') self.assertEqual(durl1.path, '/base/path/') self.assertEqual(durl1.pwa(), 'base/path/') self.assertEqual(durl1.has_path(), True) durl2 = await Durl( 'https://www.example.com/base/', base=durl1, match_base=True ) self.assertEqual(durl2, None) durl3 = await Durl( 'https://www.example.com/base/path/whatever?x=1#a', base=durl1, match_base=True, ) self.assertEqual(durl3.pwa(), 'whatever?x=1') async def asyncTearDown(self): await self.pool.pool.release(self.conn) await self.pool.pool.close()