atextcrawler/tests/page.py

25 lines
674 B
Python

"""
Test cases for resource type page.
"""
from unittest import TestCase
from atextcrawler.utils.html import clean_body
# from atextcrawler.utils.tag import drop_tags
class PageCleanTest(TestCase):
def test_clean_body_1(self):
s = ' <em>Hello</em> <strong>world</strong> '
r = '<em>Hello</em> <strong>world</strong>'
self.assertEqual(clean_body(s), r)
# def test_drop_tags(self):
# s = '<figure what="ever">something<figure>else</figure>...</figure>'
# r = drop_tags(s)
# self.assertEqual(r, '')
# s = '<rt><rt><rt><rt>something</rt></rt></rt></rt>'
# r = drop_tags(s)
# self.assertEqual(r, '')