[mastodon] update
- inherit from BaseExtractor - remove custom generate_extractors() and config() - improve layout of MastodonAPI internals
This commit is contained in:
parent
231bcad614
commit
fa33f13453
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -8,35 +8,25 @@
|
||||
|
||||
"""Extractors for mastodon instances"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, config, exception
|
||||
import re
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
class MastodonExtractor(Extractor):
|
||||
class MastodonExtractor(BaseExtractor):
|
||||
"""Base class for mastodon extractors"""
|
||||
basecategory = "mastodon"
|
||||
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
|
||||
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
|
||||
archive_fmt = "{media[id]}"
|
||||
cookiedomain = None
|
||||
instance = None
|
||||
root = None
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.instance = self.root.partition("://")[2]
|
||||
self.item = match.group(match.lastindex)
|
||||
self.api = MastodonAPI(self)
|
||||
|
||||
def config(self, key, default=None):
|
||||
return config.interpolate_common(
|
||||
("extractor",), (
|
||||
(self.category, self.subcategory),
|
||||
(self.basecategory, self.instance, self.subcategory),
|
||||
), key, default,
|
||||
)
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
for status in self.statuses():
|
||||
attachments = status["media_attachments"]
|
||||
if attachments:
|
||||
@ -60,18 +50,56 @@ class MastodonExtractor(Extractor):
|
||||
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
|
||||
INSTANCES = {
|
||||
"mastodon.social": {
|
||||
"root" : "https://mastodon.social",
|
||||
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
|
||||
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
|
||||
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
|
||||
},
|
||||
"pawoo": {
|
||||
"root" : "https://pawoo.net",
|
||||
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
|
||||
"0d62d0a75a8525953098c167eacd3668",
|
||||
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
|
||||
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
|
||||
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
|
||||
"8428ef1fadb446dcfeb4f5ed6872d97b",
|
||||
},
|
||||
"baraag": {
|
||||
"root" : "https://baraag.net",
|
||||
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
|
||||
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
|
||||
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
|
||||
}
|
||||
}
|
||||
|
||||
BASE_PATTERN = MastodonExtractor.update(INSTANCES)
|
||||
|
||||
|
||||
class MastodonUserExtractor(MastodonExtractor):
|
||||
"""Extractor for all images of an account/user"""
|
||||
subcategory = "user"
|
||||
|
||||
def __init__(self, match):
|
||||
MastodonExtractor.__init__(self, match)
|
||||
self.account_name = match.group(1)
|
||||
pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
|
||||
test = (
|
||||
("https://mastodon.social/@jk", {
|
||||
"pattern": r"https://files.mastodon.social/media_attachments"
|
||||
r"/files/(\d+/){3,}original/\w+",
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
("https://pawoo.net/@yoru_nine/", {
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
("https://baraag.net/@pumpkinnsfw"),
|
||||
)
|
||||
|
||||
def statuses(self):
|
||||
handle = "@{}@{}".format(self.account_name, self.instance)
|
||||
username = self.item
|
||||
handle = "@{}@{}".format(username, self.instance)
|
||||
for account in self.api.account_search(handle, 1):
|
||||
if account["username"] == self.account_name:
|
||||
if account["username"] == username:
|
||||
break
|
||||
else:
|
||||
raise exception.NotFoundError("account")
|
||||
@ -81,13 +109,21 @@ class MastodonUserExtractor(MastodonExtractor):
|
||||
class MastodonStatusExtractor(MastodonExtractor):
|
||||
"""Extractor for images from a status"""
|
||||
subcategory = "status"
|
||||
|
||||
def __init__(self, match):
|
||||
MastodonExtractor.__init__(self, match)
|
||||
self.status_id = match.group(1)
|
||||
pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
|
||||
test = (
|
||||
("https://mastodon.social/@jk/103794036899778366", {
|
||||
"count": 4,
|
||||
}),
|
||||
("https://pawoo.net/@yoru_nine/105038878897832922", {
|
||||
"content": "b52e807f8ab548d6f896b09218ece01eba83987a",
|
||||
}),
|
||||
("https://baraag.net/@pumpkinnsfw/104364170556898443", {
|
||||
"content": "67748c1b828c58ad60d0fe5729b59fb29c872244",
|
||||
}),
|
||||
)
|
||||
|
||||
def statuses(self):
|
||||
return (self.api.status(self.status_id),)
|
||||
return (self.api.status(self.item),)
|
||||
|
||||
|
||||
class MastodonAPI():
|
||||
@ -102,30 +138,36 @@ class MastodonAPI():
|
||||
self.extractor = extractor
|
||||
|
||||
if not access_token:
|
||||
access_token = extractor.config(
|
||||
"access-token", extractor.access_token)
|
||||
self.headers = {"Authorization": "Bearer {}".format(access_token)}
|
||||
access_token = extractor.config("access-token")
|
||||
if not access_token:
|
||||
if extractor.category not in INSTANCES:
|
||||
raise exception.StopExtraction("missing access token")
|
||||
access_token = INSTANCES[extractor.category]["access-token"]
|
||||
|
||||
self.headers = {"Authorization": "Bearer " + access_token}
|
||||
|
||||
def account_search(self, query, limit=40):
|
||||
"""Search for content"""
|
||||
endpoint = "/v1/accounts/search"
|
||||
params = {"q": query, "limit": limit}
|
||||
return self._call("accounts/search", params).json()
|
||||
return self._call(endpoint, params).json()
|
||||
|
||||
def account_statuses(self, account_id):
|
||||
"""Get an account's statuses"""
|
||||
endpoint = "accounts/{}/statuses".format(account_id)
|
||||
endpoint = "/v1/accounts/{}/statuses".format(account_id)
|
||||
params = {"only_media": "1"}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def status(self, status_id):
|
||||
"""Fetch a Status"""
|
||||
return self._call("statuses/" + status_id).json()
|
||||
"""Fetch a status"""
|
||||
endpoint = "/v1/statuses/" + status_id
|
||||
return self._call(endpoint).json()
|
||||
|
||||
def _call(self, endpoint, params=None):
|
||||
if endpoint.startswith("http"):
|
||||
url = endpoint
|
||||
else:
|
||||
url = "{}/api/v1/{}".format(self.root, endpoint)
|
||||
url = self.root + "/api" + endpoint
|
||||
|
||||
while True:
|
||||
response = self.extractor.request(
|
||||
@ -145,7 +187,7 @@ class MastodonAPI():
|
||||
raise exception.StopExtraction(response.json().get("error"))
|
||||
|
||||
def _pagination(self, endpoint, params):
|
||||
url = "{}/api/v1/{}".format(self.root, endpoint)
|
||||
url = endpoint
|
||||
while url:
|
||||
response = self._call(url, params)
|
||||
yield from response.json()
|
||||
@ -154,88 +196,3 @@ class MastodonAPI():
|
||||
if not url:
|
||||
return
|
||||
url = url["url"]
|
||||
|
||||
|
||||
def generate_extractors():
|
||||
"""Dynamically generate Extractor classes for Mastodon instances"""
|
||||
|
||||
symtable = globals()
|
||||
extractors = config.get(("extractor",), "mastodon")
|
||||
if extractors:
|
||||
util.combine_dict(EXTRACTORS, extractors)
|
||||
config.set(("extractor",), "mastodon", EXTRACTORS)
|
||||
|
||||
for instance, info in EXTRACTORS.items():
|
||||
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
|
||||
category = info.get("category") or instance.replace(".", "")
|
||||
root = info.get("root") or "https://" + instance
|
||||
name = (info.get("name") or category).capitalize()
|
||||
token = info.get("access-token")
|
||||
pattern = info.get("pattern") or re.escape(instance)
|
||||
|
||||
class Extr(MastodonUserExtractor):
|
||||
pass
|
||||
|
||||
Extr.__name__ = Extr.__qualname__ = name + "UserExtractor"
|
||||
Extr.__doc__ = "Extractor for all images of a user on " + instance
|
||||
Extr.category = category
|
||||
Extr.instance = instance
|
||||
Extr.pattern = (r"(?:https?://)?" + pattern +
|
||||
r"/@([^/?#]+)(?:/media)?/?$")
|
||||
Extr.test = info.get("test-user")
|
||||
Extr.root = root
|
||||
Extr.access_token = token
|
||||
symtable[Extr.__name__] = Extr
|
||||
|
||||
class Extr(MastodonStatusExtractor):
|
||||
pass
|
||||
|
||||
Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor"
|
||||
Extr.__doc__ = "Extractor for images from a status on " + instance
|
||||
Extr.category = category
|
||||
Extr.instance = instance
|
||||
Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)"
|
||||
Extr.test = info.get("test-status")
|
||||
Extr.root = root
|
||||
Extr.access_token = token
|
||||
symtable[Extr.__name__] = Extr
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"mastodon.social": {
|
||||
"category" : "mastodon.social",
|
||||
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
|
||||
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
|
||||
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
|
||||
"test-user" : ("https://mastodon.social/@jk", {
|
||||
"pattern": r"https://files.mastodon.social/media_attachments"
|
||||
r"/files/(\d+/){3,}original/\w+",
|
||||
"range": "1-60",
|
||||
"count": 60,
|
||||
}),
|
||||
"test-status" : ("https://mastodon.social/@jk/103794036899778366", {
|
||||
"count": 4,
|
||||
}),
|
||||
},
|
||||
"pawoo.net": {
|
||||
"category" : "pawoo",
|
||||
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
|
||||
"0d62d0a75a8525953098c167eacd3668",
|
||||
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
|
||||
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
|
||||
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
|
||||
"8428ef1fadb446dcfeb4f5ed6872d97b",
|
||||
},
|
||||
"baraag.net": {
|
||||
"category" : "baraag",
|
||||
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
|
||||
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
|
||||
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
generate_extractors()
|
||||
|
Loading…
x
Reference in New Issue
Block a user