[tumblr] enable date-min/-max/-format options (#337)
This commit is contained in:
parent
09f37fde39
commit
8d1ae9b715
@ -420,6 +420,18 @@ Description Like `image-unique`__, but applies to delegated URLs
|
|||||||
__ `extractor.*.image-unique`_
|
__ `extractor.*.image-unique`_
|
||||||
|
|
||||||
|
|
||||||
|
extractor.*.date-format
|
||||||
|
----------------------------
|
||||||
|
=========== =====
|
||||||
|
Type ``string``
|
||||||
|
Default ``"%Y-%m-%dT%H:%M:%S"``
|
||||||
|
Description Format string used to parse ``string`` values of
|
||||||
|
`date-min` and `date-max`.
|
||||||
|
|
||||||
|
See |strptime|_ for a list of formatting directives.
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Extractor-specific Options
|
Extractor-specific Options
|
||||||
==========================
|
==========================
|
||||||
@ -776,24 +788,9 @@ Description Retrieve additional comments by resolving the ``more`` comment
|
|||||||
extractor.reddit.date-min & .date-max
|
extractor.reddit.date-min & .date-max
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
Type ``integer`` or ``string``
|
Type |Date|_
|
||||||
Default ``0`` and ``253402210800`` (timestamp of |datetime.max|_)
|
Default ``0`` and ``253402210800`` (timestamp of |datetime.max|_)
|
||||||
Description Ignore all submissions posted before/after this date.
|
Description Ignore all submissions posted before/after this date.
|
||||||
|
|
||||||
* If this is an ``integer``, it represents the date as UTC timestamp.
|
|
||||||
* If this is a ``string``, it will get parsed according to date-format_.
|
|
||||||
=========== =====
|
|
||||||
|
|
||||||
|
|
||||||
extractor.reddit.date-format
|
|
||||||
----------------------------
|
|
||||||
=========== =====
|
|
||||||
Type ``string``
|
|
||||||
Default ``"%Y-%m-%dT%H:%M:%S"``
|
|
||||||
Description An explicit format string used to parse the ``string`` values of
|
|
||||||
`date-min and date-max`_.
|
|
||||||
|
|
||||||
See |strptime|_ for a list of formatting directives.
|
|
||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
@ -870,6 +867,15 @@ Description Download blog avatars.
|
|||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
extractor.tumblr.date-min & .date-max
|
||||||
|
-------------------------------------
|
||||||
|
=========== =====
|
||||||
|
Type |Date|_
|
||||||
|
Default ``0`` and ``null``
|
||||||
|
Description Ignore all posts published before/after this date.
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.tumblr.external
|
extractor.tumblr.external
|
||||||
-------------------------
|
-------------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
@ -1546,6 +1552,20 @@ Custom Types
|
|||||||
============
|
============
|
||||||
|
|
||||||
|
|
||||||
|
Date
|
||||||
|
----
|
||||||
|
=========== =====
|
||||||
|
Type ``string`` or ``integer``
|
||||||
|
Examples * ``"2019-01-01T00:00:00"``
|
||||||
|
* ``"2019"`` with ``"%Y"`` as date-format_
|
||||||
|
* ``1546297200``
|
||||||
|
Description A |Date|_ value represents a specific point in time.
|
||||||
|
|
||||||
|
* If given as ``string``, it is parsed according to date-format_.
|
||||||
|
* If given as ``integer``, it is interpreted as UTC timestamp.
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
Path
|
Path
|
||||||
----
|
----
|
||||||
=========== =====
|
=========== =====
|
||||||
@ -1667,6 +1687,7 @@ Description An object with the ``name`` of a post-processor and its options.
|
|||||||
.. |webbrowser.open()| replace:: ``webbrowser.open()``
|
.. |webbrowser.open()| replace:: ``webbrowser.open()``
|
||||||
.. |datetime| replace:: ``datetime``
|
.. |datetime| replace:: ``datetime``
|
||||||
.. |datetime.max| replace:: ``datetime.max``
|
.. |datetime.max| replace:: ``datetime.max``
|
||||||
|
.. |Date| replace:: ``Date``
|
||||||
.. |Path| replace:: ``Path``
|
.. |Path| replace:: ``Path``
|
||||||
.. |Last-Modified| replace:: ``Last-Modified``
|
.. |Last-Modified| replace:: ``Last-Modified``
|
||||||
.. |Logging Configuration| replace:: ``Logging Configuration``
|
.. |Logging Configuration| replace:: ``Logging Configuration``
|
||||||
@ -1675,8 +1696,7 @@ Description An object with the ``name`` of a post-processor and its options.
|
|||||||
|
|
||||||
.. _base-directory: `extractor.*.base-directory`_
|
.. _base-directory: `extractor.*.base-directory`_
|
||||||
.. _skipped: `extractor.*.skip`_
|
.. _skipped: `extractor.*.skip`_
|
||||||
.. _`date-min and date-max`: `extractor.reddit.date-min & .date-max`_
|
.. _date-format: `extractor.*.date-format`_
|
||||||
.. _date-format: extractor.reddit.date-format_
|
|
||||||
.. _deviantart.metadata: extractor.deviantart.metadata_
|
.. _deviantart.metadata: extractor.deviantart.metadata_
|
||||||
|
|
||||||
.. _.netrc: https://stackoverflow.com/tags/.netrc/info
|
.. _.netrc: https://stackoverflow.com/tags/.netrc/info
|
||||||
|
@ -65,11 +65,15 @@ class TumblrExtractor(Extractor):
|
|||||||
if self.reblogs == "same-blog":
|
if self.reblogs == "same-blog":
|
||||||
self._skip_reblog = self._skip_reblog_same_blog
|
self._skip_reblog = self._skip_reblog_same_blog
|
||||||
|
|
||||||
|
self.date_min, self.api.before = self._get_date_min_max(0, None)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
blog = None
|
blog = None
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
|
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
|
if self.date_min > post["timestamp"]:
|
||||||
|
return
|
||||||
if post["type"] not in self.types:
|
if post["type"] not in self.types:
|
||||||
continue
|
continue
|
||||||
if not blog:
|
if not blog:
|
||||||
@ -223,6 +227,11 @@ class TumblrUserExtractor(TumblrExtractor):
|
|||||||
"count": 2,
|
"count": 2,
|
||||||
"keyword": {"tags": ["test", "private", "hidden"]},
|
"keyword": {"tags": ["test", "private", "hidden"]},
|
||||||
}),
|
}),
|
||||||
|
("https://mikf123.tumblr.com/", { # date-min/-max/-format (#337)
|
||||||
|
"count": 4,
|
||||||
|
"options": (("date-min", "201804"), ("date-max", "201805"),
|
||||||
|
("date-format", "%Y%m"))
|
||||||
|
}),
|
||||||
("https://demo.tumblr.com/page/2"),
|
("https://demo.tumblr.com/page/2"),
|
||||||
("https://demo.tumblr.com/archive"),
|
("https://demo.tumblr.com/archive"),
|
||||||
("tumblr:http://www.b-authentique.com/"),
|
("tumblr:http://www.b-authentique.com/"),
|
||||||
@ -280,6 +289,7 @@ class TumblrPostExtractor(TumblrExtractor):
|
|||||||
TumblrExtractor.__init__(self, match)
|
TumblrExtractor.__init__(self, match)
|
||||||
self.post_id = match.group(3)
|
self.post_id = match.group(3)
|
||||||
self.reblogs = True
|
self.reblogs = True
|
||||||
|
self.date_min = 0
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return self.api.posts(self.blog, {"id": self.post_id})
|
return self.api.posts(self.blog, {"id": self.post_id})
|
||||||
@ -328,7 +338,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
|
|
||||||
def __init__(self, extractor):
|
def __init__(self, extractor):
|
||||||
oauth.OAuth1API.__init__(self, extractor)
|
oauth.OAuth1API.__init__(self, extractor)
|
||||||
self.posts_type = None
|
self.posts_type = self.before = None
|
||||||
|
|
||||||
def info(self, blog):
|
def info(self, blog):
|
||||||
"""Return general information about a blog"""
|
"""Return general information about a blog"""
|
||||||
@ -350,6 +360,8 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
params.update({"offset": 0, "limit": 50, "reblog_info": "true"})
|
params.update({"offset": 0, "limit": 50, "reblog_info": "true"})
|
||||||
if self.posts_type:
|
if self.posts_type:
|
||||||
params["type"] = self.posts_type
|
params["type"] = self.posts_type
|
||||||
|
if self.before:
|
||||||
|
params["before"] = self.before
|
||||||
while True:
|
while True:
|
||||||
data = self._call(blog, "posts", params)
|
data = self._call(blog, "posts", params)
|
||||||
self.BLOG_CACHE[blog] = data["blog"]
|
self.BLOG_CACHE[blog] = data["blog"]
|
||||||
@ -360,7 +372,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
|
|
||||||
def likes(self, blog):
|
def likes(self, blog):
|
||||||
"""Retrieve liked posts"""
|
"""Retrieve liked posts"""
|
||||||
params = {"limit": 50}
|
params = {"limit": "50", "before": self.before}
|
||||||
while True:
|
while True:
|
||||||
posts = self._call(blog, "likes", params)["liked_posts"]
|
posts = self._call(blog, "likes", params)["liked_posts"]
|
||||||
if not posts:
|
if not posts:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user