from unittest import TestCase from atextcrawler.utils.section import concat_section_texts, iter_sections class IterSectionTest(TestCase): def test_iter_sections_1(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 80, 5: 2, 15: 1, 20: 3} sections1 = list(iter_sections(s, sb, max_level=100)) sections2 = [ (0, 5, 80, 'bcde'), (5, 15, 2, 'ghijklmno'), (15, 20, 1, 'qrst'), (20, 26, 3, 'uvwxyz'), ] self.assertEqual(sections1, sections2) def test_iter_sections_2(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 4, 5: 2, 15: 1, 20: 3, 26: 9} sections1 = list(iter_sections(s, sb, max_level=100)) sections2 = [ (0, 5, 4, 'bcde'), (5, 15, 2, 'ghijklmno'), (15, 20, 1, 'qrst'), (20, 26, 3, 'vwxyz'), ] self.assertEqual(sections1, sections2) def test_iter_sections_3(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {5: 2, 15: 60, 18: 50, 20: 3} sections1 = list(iter_sections(s, sb, max_level=59)) sections2 = [ (0, 5, 80, 'bcde'), (5, 18, 2, 'ghijklmnopqr'), (18, 20, 50, 't'), (20, 26, 3, 'uvwxyz'), ] self.assertEqual(sections1, sections2) def test_iter_sections_4(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {5: 2, 15: 60, 18: 50, 20: 3, 24: 60} sections1 = list(iter_sections(s, sb, max_level=59)) sections2 = [ (0, 5, 80, 'bcde'), (5, 18, 2, 'ghijklmnopqr'), (18, 20, 50, 't'), (20, 26, 3, 'uvwxyz'), ] self.assertEqual(sections1, sections2) class AggSectionTest(TestCase): def test_concat_sections_1(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 1, 5: 1, 15: 1, 20: 1} sections1 = list(concat_section_texts(s, sb, min_len=10)) sections2 = [ ([0, 1], 'abcdefghijklmno'), ([2, 3], 'pqrstuvwxyz'), ] self.assertEqual(sections1, sections2) def test_concat_sections_2(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 1, 2: 1, 10: 1, 20: 1, 26: 1} sections1 = list(concat_section_texts(s, sb, min_len=10)) sections2 = [ ([0, 1], 'abcdefghij'), ([2, 3, 4], 'klmnopqrstuvwxyz'), ] self.assertEqual(sections1, sections2) def test_concat_sections_3(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 1, 4: 1, 6: 1, 16: 1, 26: 1} sections1 = list(concat_section_texts(s, sb, min_len=10)) sections2 = [ ([0, 1, 2], 'abcdefghijklmnop'), ([3, 4], 'qrstuvwxyz'), ] self.assertEqual(sections1, sections2) def test_concat_sections_4(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 1, 5: 1, 15: 1, 26: 1} sections1 = list(concat_section_texts(s, sb, min_len=10)) sections2 = [ ([0, 1], 'abcdefghijklmno'), ([2, 3], 'pqrstuvwxyz'), ] self.assertEqual(sections1, sections2) def test_concat_sections_5(self): s = 'abcdefghijklmnopqrstuvwxyz' sb = {0: 1, 5: 1, 12: 1, 22: 1, 23: 1, 24: 1, 26: 1} sections1 = list(concat_section_texts(s, sb, min_len=10)) sections2 = [ ([0, 1], 'abcdefghijkl'), ([2, 3, 4, 5, 6], 'mnopqrstuvwxyz'), ] self.assertEqual(sections1, sections2)