atextcrawler/tests/section.py

106 lines
3.5 KiB
Python

from unittest import TestCase
from atextcrawler.utils.section import concat_section_texts, iter_sections
class IterSectionTest(TestCase):
def test_iter_sections_1(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 80, 5: 2, 15: 1, 20: 3}
sections1 = list(iter_sections(s, sb, max_level=100))
sections2 = [
(0, 5, 80, 'bcde'),
(5, 15, 2, 'ghijklmno'),
(15, 20, 1, 'qrst'),
(20, 26, 3, 'uvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_iter_sections_2(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 4, 5: 2, 15: 1, 20: 3, 26: 9}
sections1 = list(iter_sections(s, sb, max_level=100))
sections2 = [
(0, 5, 4, 'bcde'),
(5, 15, 2, 'ghijklmno'),
(15, 20, 1, 'qrst'),
(20, 26, 3, 'vwxyz'),
]
self.assertEqual(sections1, sections2)
def test_iter_sections_3(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {5: 2, 15: 60, 18: 50, 20: 3}
sections1 = list(iter_sections(s, sb, max_level=59))
sections2 = [
(0, 5, 80, 'bcde'),
(5, 18, 2, 'ghijklmnopqr'),
(18, 20, 50, 't'),
(20, 26, 3, 'uvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_iter_sections_4(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {5: 2, 15: 60, 18: 50, 20: 3, 24: 60}
sections1 = list(iter_sections(s, sb, max_level=59))
sections2 = [
(0, 5, 80, 'bcde'),
(5, 18, 2, 'ghijklmnopqr'),
(18, 20, 50, 't'),
(20, 26, 3, 'uvwxyz'),
]
self.assertEqual(sections1, sections2)
class AggSectionTest(TestCase):
def test_concat_sections_1(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 1, 5: 1, 15: 1, 20: 1}
sections1 = list(concat_section_texts(s, sb, min_len=10))
sections2 = [
([0, 1], 'abcdefghijklmno'),
([2, 3], 'pqrstuvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_concat_sections_2(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 1, 2: 1, 10: 1, 20: 1, 26: 1}
sections1 = list(concat_section_texts(s, sb, min_len=10))
sections2 = [
([0, 1], 'abcdefghij'),
([2, 3, 4], 'klmnopqrstuvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_concat_sections_3(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 1, 4: 1, 6: 1, 16: 1, 26: 1}
sections1 = list(concat_section_texts(s, sb, min_len=10))
sections2 = [
([0, 1, 2], 'abcdefghijklmnop'),
([3, 4], 'qrstuvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_concat_sections_4(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 1, 5: 1, 15: 1, 26: 1}
sections1 = list(concat_section_texts(s, sb, min_len=10))
sections2 = [
([0, 1], 'abcdefghijklmno'),
([2, 3], 'pqrstuvwxyz'),
]
self.assertEqual(sections1, sections2)
def test_concat_sections_5(self):
s = 'abcdefghijklmnopqrstuvwxyz'
sb = {0: 1, 5: 1, 12: 1, 22: 1, 23: 1, 24: 1, 26: 1}
sections1 = list(concat_section_texts(s, sb, min_len=10))
sections2 = [
([0, 1], 'abcdefghijkl'),
([2, 3, 4, 5, 6], 'mnopqrstuvwxyz'),
]
self.assertEqual(sections1, sections2)