implement 'util.unique_squence()'

This commit is contained in:
Mike Fährmann 2021-03-02 23:01:38 +01:00
parent bae874f370
commit bff71cde80
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 25 additions and 9 deletions

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -125,17 +125,14 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor):
def items(self):
pnum = 1
last = None
data = {"_extractor": SankakucomplexArticleExtractor}
yield Message.Version, 1
while True:
url = "{}/{}/page/{}/".format(self.root, self.path, pnum)
response = self.request(url, fatal=False)
if response.status_code >= 400:
return
for url in text.extract_iter(response.text, 'data-direct="', '"'):
if url != last:
last = url
yield Message.Queue, url, data
for url in util.unique_sequence(text.extract_iter(
response.text, 'data-direct="', '"')):
yield Message.Queue, url, data
pnum += 1

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2020 Mike Fährmann
# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -65,6 +65,15 @@ def unique(iterable):
yield element
def unique_sequence(iterable):
"""Yield sequentially unique elements from 'iterable'"""
last = None
for element in iterable:
if element != last:
last = element
yield element
def raises(cls):
"""Returns a function that raises 'cls' as exception"""
def wrap(*args):

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2015-2020 Mike Fährmann
# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -461,6 +461,16 @@ class TestOther(unittest.TestCase):
self.assertSequenceEqual(
list(util.unique([1, 2, 1, 3, 2, 1])), [1, 2, 3])
def test_unique_sequence(self):
self.assertSequenceEqual(
list(util.unique_sequence("")), "")
self.assertSequenceEqual(
list(util.unique_sequence("AABBCC")), "ABC")
self.assertSequenceEqual(
list(util.unique_sequence("ABABABCAABBCC")), "ABABABCABC")
self.assertSequenceEqual(
list(util.unique_sequence([1, 2, 1, 3, 2, 1])), [1, 2, 1, 3, 2, 1])
def test_raises(self):
func = util.raises(Exception)
with self.assertRaises(Exception):