From 9da2bc67f8adbf576542925cc6d3747f079f360d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 24 Jun 2020 21:13:16 +0200 Subject: [PATCH] [twitter] add option to filter media from quoted tweets (#854) --- docs/configuration.rst | 13 +++++++++++-- docs/gallery-dl.conf | 1 + gallery_dl/extractor/twitter.py | 22 +++++++++++++++------- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 5ddc6dd0..cef477de 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1213,12 +1213,21 @@ Description A (comma-separated) list of post types to extract images, etc. from. =========== ===== +extractor.twitter.quoted +------------------------ +=========== ===== +Type ``bool`` +Default ``true`` +Description Fetch media from quoted Tweets. +=========== ===== + + extractor.twitter.replies ------------------------- =========== ===== Type ``bool`` Default ``true`` -Description Extract media from replies to other Tweets. +Description Fetch media from replies to other Tweets. =========== ===== @@ -1227,7 +1236,7 @@ extractor.twitter.retweets =========== ===== Type ``bool`` Default ``true`` -Description Extract media from Retweets. +Description Fetch media from Retweets. =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index ae4839d5..aa54e1a3 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -155,6 +155,7 @@ }, "twitter": { + "quoted": true, "replies": true, "retweets": true, "twitpic": false, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 20fb2363..1e985e33 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -31,6 +31,7 @@ class TwitterExtractor(Extractor): self.retweets = self.config("retweets", True) self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) + self.quoted = self.config("quoted", True) self.videos = self.config("videos", True) self._user_cache = {} @@ -41,10 +42,9 @@ class TwitterExtractor(Extractor): for tweet in self.tweets(): - if not self.retweets and ( - "retweeted_status_id_str" in tweet or - "quoted_status_id_str" in tweet) or \ - not self.replies and "in_reply_to_user_id_str" in tweet: + if (not self.retweets and "retweeted_status_id_str" in tweet or + not self.replies and "in_reply_to_user_id_str" in tweet or + not self.quoted and "quoted" in tweet): continue if self.twitpic: @@ -340,9 +340,16 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("replies", False),), "count": 0, }), - # quoted tweet (#526) - ("https://twitter.com/Pistachio/status/1222690391817932803", { - "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg", + # quoted tweet (#526, #854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+\.jpg", + "count": 8, + }), + # "quoted" option (#854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "options": (("quoted", False),), + "pattern": r"https://pbs\.twimg\.com/media/EaK.+\.jpg", + "count": 4, }), # TwitPic embeds (#579) ("https://twitter.com/i/web/status/112900228289540096", { @@ -526,6 +533,7 @@ class TwitterAPI(): if quoted: quoted["author"] = users[quoted["user_id_str"]] quoted["user"] = tweet["user"] + quoted["quoted"] = True yield quoted elif entry["entryId"].startswith(entry_cursor):