forked from attardi/wikiextractor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests.py
100 lines (74 loc) · 3.24 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# coding: utf-8
# This test must pass both in Python 2 and 3.
from __future__ import unicode_literals
import sys
import os.path
import unittest
from WikiExtractor import (
normalizeTitle, unescape, ucfirst, lcfirst, splitParts,
fullyQualifiedTemplateTitle, NextFile
)
class TestNormalizeTitle(unittest.TestCase):
def test_known_namespace(self):
self.assertEqual(normalizeTitle("Template: Births"), "Template:Births")
self.assertEqual(normalizeTitle(" template: births_"), "Template:Births")
def test_not_known_namespace(self):
self.assertEqual(normalizeTitle("Category: Births"), "Category: Births")
self.assertEqual(normalizeTitle("_category: births___"), "Category: Births")
def test_no_namespace(self):
self.assertEqual(normalizeTitle("python"), "Python")
self.assertEqual(normalizeTitle("python 3"), "Python 3")
self.assertEqual(normalizeTitle("python__3"), "Python 3")
class TestStringUtils(unittest.TestCase):
def test_unescape(self):
self.assertEqual(unescape('"'), '"')
self.assertEqual(unescape('&'), '&')
self.assertEqual(unescape('あ'), '\u3042')
if sys.maxunicode > 0xFFFF:
# Python 3 or UCS-4 build of Python 2
self.assertEqual(unescape('𝕆'), '\U0001D546')
self.assertEqual(unescape('𝓁'), '\U0001d4c1')
else:
# UCS-2 build of Python 2
self.assertEqual(unescape('𝕆'), '𝕆')
self.assertEqual(unescape('𝓁'), '𝓁')
def test_ucfirst(self):
self.assertEqual(ucfirst('python'), 'Python')
def test_lcfirst(self):
self.assertEqual(lcfirst('Python'), 'python')
class TestSplitParts(unittest.TestCase):
def test_simple(self):
self.assertEqual(splitParts("p=q|q=r|r=s"), ['p=q', 'q=r', 'r=s'])
def test_complex(self):
self.assertEqual(splitParts('{{#if: {{{1}}} | {{lc:{{{1}}} | "parameter missing"}}'),
['{{#if: {{{1}}} ', ' {{lc:{{{1}}} ', ' "parameter missing"}}'])
self.assertEqual(splitParts('''{{if:|
|{{#if:the president|
|{{#if:|
[[Category:Hatnote templates|A{{PAGENAME}}]]
}}
}}
}}'''), ['''{{if:|
|{{#if:the president|
|{{#if:|
[[Category:Hatnote templates|A{{PAGENAME}}]]
}}
}}
}}'''])
class TestFullyQualifiedTemplateTitle(unittest.TestCase):
def test_main_namespace(self):
self.assertEqual(fullyQualifiedTemplateTitle(':Python'), 'Python')
self.assertEqual(fullyQualifiedTemplateTitle(':python'), 'Python')
def test_other_namespace(self):
self.assertEqual(fullyQualifiedTemplateTitle('User:Orange'), 'User:Orange')
class TestNextFile(unittest.TestCase):
def test_next(self):
f = NextFile('out')
self.assertEqual(next(f), 'out{}AA/wiki_00'.format(os.path.sep))
self.assertEqual(next(f), 'out{}AA/wiki_01'.format(os.path.sep))
for _ in range(97):
next(f)
self.assertEqual(next(f), 'out{}AA/wiki_99'.format(os.path.sep))
self.assertEqual(next(f), 'out{}AB/wiki_00'.format(os.path.sep))
if __name__ == '__main__':
unittest.main()