[twitter] strip useless t.co links (#1532)

The 'full_text' of Tweets with media content usually ends with a t.co
link to itself. This commit removes those.
This commit is contained in:
Mike Fährmann 2021-05-16 02:35:55 +02:00
parent 3a7c3ff138
commit 394fbb5f56
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -191,7 +191,8 @@ class TwitterExtractor(Extractor):
if urls:
for url in urls:
content = content.replace(url["url"], url["expanded_url"])
tdata["content"] = content
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]