93 lines
3.1 KiB
Python
93 lines
3.1 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
# Copyright 2020 Mike Fährmann
|
||
|
#
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License version 2 as
|
||
|
# published by the Free Software Foundation.
|
||
|
|
||
|
"""Extractors for https://kabe-uchiroom.com/"""
|
||
|
|
||
|
from .common import Extractor, Message
|
||
|
from .. import text, exception
|
||
|
|
||
|
|
||
|
class KabeuchiUserExtractor(Extractor):
|
||
|
"""Extractor for all posts of a user on kabe-uchiroom.com"""
|
||
|
category = "kabeuchi"
|
||
|
subcategory = "user"
|
||
|
directory_fmt = ("{category}", "{twitter_user_id} {twitter_id}")
|
||
|
filename_fmt = "{id}_{num:>02}{title:?_//}.{extension}"
|
||
|
archive_fmt = "{id}_{num}"
|
||
|
root = "https://kabe-uchiroom.com"
|
||
|
pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)"
|
||
|
test = (
|
||
|
("https://kabe-uchiroom.com/mypage/?id=919865303848255493", {
|
||
|
"pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/"
|
||
|
r"919865303848255493/\w+\.jpe?g"),
|
||
|
"count": ">= 24",
|
||
|
}),
|
||
|
("https://kabe-uchiroom.com/mypage/?id=123456789", {
|
||
|
"exception": exception.NotFoundError,
|
||
|
}),
|
||
|
)
|
||
|
|
||
|
def __init__(self, match):
|
||
|
Extractor.__init__(self, match)
|
||
|
self.user_id = match.group(1)
|
||
|
|
||
|
def items(self):
|
||
|
base = "{}/accounts/upfile/{}/{}/".format(
|
||
|
self.root, self.user_id[-1], self.user_id)
|
||
|
keys = ("image1", "image2", "image3", "image4", "image5", "image6")
|
||
|
|
||
|
for post in self.posts():
|
||
|
if post.get("is_ad") or not post["image1"]:
|
||
|
continue
|
||
|
|
||
|
post["date"] = text.parse_datetime(
|
||
|
post["created_at"], "%Y-%m-%d %H:%M:%S")
|
||
|
yield Message.Directory, post
|
||
|
|
||
|
for key in keys:
|
||
|
name = post[key]
|
||
|
if not name:
|
||
|
break
|
||
|
url = base + name
|
||
|
post["num"] = ord(key[-1]) - 48
|
||
|
yield Message.Url, url, text.nameext_from_url(name, post)
|
||
|
|
||
|
def posts(self):
|
||
|
url = "{}/mypage/?id={}".format(self.root, self.user_id)
|
||
|
response = self.request(url)
|
||
|
if response.history and response.url == self.root + "/":
|
||
|
raise exception.NotFoundError("user")
|
||
|
target_id = text.extract(response.text, 'user_friend_id = "', '"')[0]
|
||
|
return self._pagination(target_id)
|
||
|
|
||
|
def _pagination(self, target_id):
|
||
|
url = "{}/get_posts.php".format(self.root)
|
||
|
data = {
|
||
|
"user_id" : "0",
|
||
|
"target_id" : target_id,
|
||
|
"type" : "uploads",
|
||
|
"sort_type" : "0",
|
||
|
"category_id": "all",
|
||
|
"latest_post": "",
|
||
|
"page_num" : 0,
|
||
|
}
|
||
|
|
||
|
while True:
|
||
|
info = self.request(url, method="POST", data=data).json()
|
||
|
datas = info["datas"]
|
||
|
|
||
|
if not datas or not isinstance(datas, list):
|
||
|
return
|
||
|
yield from datas
|
||
|
|
||
|
last_id = datas[-1]["id"]
|
||
|
if last_id == info["last_data"]:
|
||
|
return
|
||
|
data["latest_post"] = last_id
|
||
|
data["page_num"] += 1
|