[tumblr] enable date-min/-max/-format options (#337)
This commit is contained in:
parent
09f37fde39
commit
8d1ae9b715
@ -420,6 +420,18 @@ Description Like `image-unique`__, but applies to delegated URLs
|
||||
__ `extractor.*.image-unique`_
|
||||
|
||||
|
||||
extractor.*.date-format
|
||||
----------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"%Y-%m-%dT%H:%M:%S"``
|
||||
Description Format string used to parse ``string`` values of
|
||||
`date-min` and `date-max`.
|
||||
|
||||
See |strptime|_ for a list of formatting directives.
|
||||
=========== =====
|
||||
|
||||
|
||||
|
||||
Extractor-specific Options
|
||||
==========================
|
||||
@ -776,24 +788,9 @@ Description Retrieve additional comments by resolving the ``more`` comment
|
||||
extractor.reddit.date-min & .date-max
|
||||
-------------------------------------
|
||||
=========== =====
|
||||
Type ``integer`` or ``string``
|
||||
Type |Date|_
|
||||
Default ``0`` and ``253402210800`` (timestamp of |datetime.max|_)
|
||||
Description Ignore all submissions posted before/after this date.
|
||||
|
||||
* If this is an ``integer``, it represents the date as UTC timestamp.
|
||||
* If this is a ``string``, it will get parsed according to date-format_.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.reddit.date-format
|
||||
----------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"%Y-%m-%dT%H:%M:%S"``
|
||||
Description An explicit format string used to parse the ``string`` values of
|
||||
`date-min and date-max`_.
|
||||
|
||||
See |strptime|_ for a list of formatting directives.
|
||||
=========== =====
|
||||
|
||||
|
||||
@ -870,6 +867,15 @@ Description Download blog avatars.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.tumblr.date-min & .date-max
|
||||
-------------------------------------
|
||||
=========== =====
|
||||
Type |Date|_
|
||||
Default ``0`` and ``null``
|
||||
Description Ignore all posts published before/after this date.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.tumblr.external
|
||||
-------------------------
|
||||
=========== =====
|
||||
@ -1546,6 +1552,20 @@ Custom Types
|
||||
============
|
||||
|
||||
|
||||
Date
|
||||
----
|
||||
=========== =====
|
||||
Type ``string`` or ``integer``
|
||||
Examples * ``"2019-01-01T00:00:00"``
|
||||
* ``"2019"`` with ``"%Y"`` as date-format_
|
||||
* ``1546297200``
|
||||
Description A |Date|_ value represents a specific point in time.
|
||||
|
||||
* If given as ``string``, it is parsed according to date-format_.
|
||||
* If given as ``integer``, it is interpreted as UTC timestamp.
|
||||
=========== =====
|
||||
|
||||
|
||||
Path
|
||||
----
|
||||
=========== =====
|
||||
@ -1667,6 +1687,7 @@ Description An object with the ``name`` of a post-processor and its options.
|
||||
.. |webbrowser.open()| replace:: ``webbrowser.open()``
|
||||
.. |datetime| replace:: ``datetime``
|
||||
.. |datetime.max| replace:: ``datetime.max``
|
||||
.. |Date| replace:: ``Date``
|
||||
.. |Path| replace:: ``Path``
|
||||
.. |Last-Modified| replace:: ``Last-Modified``
|
||||
.. |Logging Configuration| replace:: ``Logging Configuration``
|
||||
@ -1675,8 +1696,7 @@ Description An object with the ``name`` of a post-processor and its options.
|
||||
|
||||
.. _base-directory: `extractor.*.base-directory`_
|
||||
.. _skipped: `extractor.*.skip`_
|
||||
.. _`date-min and date-max`: `extractor.reddit.date-min & .date-max`_
|
||||
.. _date-format: extractor.reddit.date-format_
|
||||
.. _date-format: `extractor.*.date-format`_
|
||||
.. _deviantart.metadata: extractor.deviantart.metadata_
|
||||
|
||||
.. _.netrc: https://stackoverflow.com/tags/.netrc/info
|
||||
|
@ -65,11 +65,15 @@ class TumblrExtractor(Extractor):
|
||||
if self.reblogs == "same-blog":
|
||||
self._skip_reblog = self._skip_reblog_same_blog
|
||||
|
||||
self.date_min, self.api.before = self._get_date_min_max(0, None)
|
||||
|
||||
def items(self):
|
||||
blog = None
|
||||
yield Message.Version, 1
|
||||
|
||||
for post in self.posts():
|
||||
if self.date_min > post["timestamp"]:
|
||||
return
|
||||
if post["type"] not in self.types:
|
||||
continue
|
||||
if not blog:
|
||||
@ -223,6 +227,11 @@ class TumblrUserExtractor(TumblrExtractor):
|
||||
"count": 2,
|
||||
"keyword": {"tags": ["test", "private", "hidden"]},
|
||||
}),
|
||||
("https://mikf123.tumblr.com/", { # date-min/-max/-format (#337)
|
||||
"count": 4,
|
||||
"options": (("date-min", "201804"), ("date-max", "201805"),
|
||||
("date-format", "%Y%m"))
|
||||
}),
|
||||
("https://demo.tumblr.com/page/2"),
|
||||
("https://demo.tumblr.com/archive"),
|
||||
("tumblr:http://www.b-authentique.com/"),
|
||||
@ -280,6 +289,7 @@ class TumblrPostExtractor(TumblrExtractor):
|
||||
TumblrExtractor.__init__(self, match)
|
||||
self.post_id = match.group(3)
|
||||
self.reblogs = True
|
||||
self.date_min = 0
|
||||
|
||||
def posts(self):
|
||||
return self.api.posts(self.blog, {"id": self.post_id})
|
||||
@ -328,7 +338,7 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
|
||||
def __init__(self, extractor):
|
||||
oauth.OAuth1API.__init__(self, extractor)
|
||||
self.posts_type = None
|
||||
self.posts_type = self.before = None
|
||||
|
||||
def info(self, blog):
|
||||
"""Return general information about a blog"""
|
||||
@ -350,6 +360,8 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
params.update({"offset": 0, "limit": 50, "reblog_info": "true"})
|
||||
if self.posts_type:
|
||||
params["type"] = self.posts_type
|
||||
if self.before:
|
||||
params["before"] = self.before
|
||||
while True:
|
||||
data = self._call(blog, "posts", params)
|
||||
self.BLOG_CACHE[blog] = data["blog"]
|
||||
@ -360,7 +372,7 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
|
||||
def likes(self, blog):
|
||||
"""Retrieve liked posts"""
|
||||
params = {"limit": 50}
|
||||
params = {"limit": "50", "before": self.before}
|
||||
while True:
|
||||
posts = self._call(blog, "likes", params)["liked_posts"]
|
||||
if not posts:
|
||||
|
Loading…
x
Reference in New Issue
Block a user