implement text.split_html()
This commit is contained in:
parent
53f36176fd
commit
ae9a37a528
@ -43,6 +43,17 @@ def remove_html(txt):
|
||||
return ""
|
||||
|
||||
|
||||
def split_html(txt, sep=None):
|
||||
"""Split input string by html-tags"""
|
||||
try:
|
||||
return [
|
||||
x for x in re.split("<[^>]+>", txt)
|
||||
if x and not x.isspace()
|
||||
]
|
||||
except TypeError:
|
||||
return []
|
||||
|
||||
|
||||
def filename_from_url(url):
|
||||
"""Extract the last part of an url to use as a filename"""
|
||||
try:
|
||||
|
@ -64,6 +64,30 @@ class TestText(unittest.TestCase):
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), "")
|
||||
|
||||
def test_split_html(self, f=text.split_html):
|
||||
result = ["Hello", "World."]
|
||||
empty = []
|
||||
|
||||
# standard usage
|
||||
self.assertEqual(f(""), empty)
|
||||
self.assertEqual(f("Hello World."), ["Hello World."])
|
||||
self.assertEqual(f(" Hello World. "), [" Hello World. "])
|
||||
self.assertEqual(f("Hello<br/>World."), result)
|
||||
self.assertEqual(
|
||||
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
|
||||
|
||||
# empty HTML
|
||||
self.assertEqual(f("<div></div>"), empty)
|
||||
self.assertEqual(f(" <div> </div> "), empty)
|
||||
|
||||
# malformed HTML
|
||||
self.assertEqual(f("<div</div>"), empty)
|
||||
self.assertEqual(f("<div<Hello World.</div>"), empty)
|
||||
|
||||
# invalid arguments
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), empty)
|
||||
|
||||
def test_filename_from_url(self, f=text.filename_from_url):
|
||||
result = "filename.ext"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user