[pinterest] add extractors for related pins

Related pins can not be accessed by adding a "#related" fragment
to the end of a Pinterest URL, for example:
- https://www.pinterest.com/pin/858146903966145189/#related
- https://www.pinterest.com/g1952849/test-/#related

There are no explicit real URLs for related pins,
using an option to enable them results in "clunky" code,
and a custom "related:<URL>" scheme doesn't feel right either.
This commit is contained in:
Mike Fährmann 2018-08-15 21:28:27 +02:00
parent 1694039de0
commit 63fa0b2006
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 82 additions and 20 deletions

View File

@ -57,7 +57,7 @@ nijie https://nijie.info/ |Images from Use-3|
Nyafuu Archive https://archive.nyafuu.org/ Threads
Pawoo https://pawoo.net Images from Users, Images from Statuses
Piczel https://piczel.tv/ Images from Users, Folders, individual Images
Pinterest https://www.pinterest.com Boards, Pins, pin.it Links
Pinterest https://www.pinterest.com Boards, Pins, pin.it Links, related Pins
Pixiv https://www.pixiv.net/ |Images from Use-4| Required
PowerManga https://powermanga.org/ Chapters, Manga
Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics

View File

@ -13,6 +13,9 @@ from .. import text, exception
import json
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.\w+"
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
@ -23,6 +26,16 @@ class PinterestExtractor(Extractor):
Extractor.__init__(self)
self.api = PinterestAPI(self)
def items(self):
data = self.metadata()
yield Message.Version, 1
yield Message.Directory, data
for pin in self.pins():
url, pin_data = self.data_from_pin(pin)
pin_data.update(data)
yield Message.Url, url, pin_data
def data_from_pin(self, pin):
"""Get image url and metadata from a pin-object"""
img = pin["images"]["orig"]
@ -35,7 +48,7 @@ class PinterestExtractor(Extractor):
class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com"""
subcategory = "pin"
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/pin/([^/?#&]+)"]
pattern = [BASE_PATTERN + r"/pin/([^/?#&]+)/?$"]
test = [
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
@ -51,13 +64,14 @@ class PinterestPinExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self)
self.pin_id = match.group(1)
self.pin = None
def items(self):
pin = self.api.pin(self.pin_id)
url, data = self.data_from_pin(pin)
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
def metadata(self):
self.pin = self.api.pin(self.pin_id)
return self.data_from_pin(self.pin)[1]
def pins(self):
return (self.pin,)
class PinterestBoardExtractor(PinterestExtractor):
@ -65,8 +79,7 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"]
archive_fmt = "{board[id]}_{id}"
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/"
r"(?!pin/)([^/?#&]+)/([^/?#&]+)"]
pattern = [BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?$"]
test = [
("https://www.pinterest.com/g1952849/test-/", {
"url": "85911dfca313f3f7f48c2aa0bc684f539d1d80a6",
@ -80,16 +93,52 @@ class PinterestBoardExtractor(PinterestExtractor):
PinterestExtractor.__init__(self)
self.user = text.unquote(match.group(1))
self.board = text.unquote(match.group(2))
self.board_id = 0
def items(self):
def metadata(self):
board = self.api.board(self.user, self.board)
data = {"board": board, "count": board["pin_count"]}
yield Message.Version, 1
yield Message.Directory, data
for pin in self.api.board_pins(board["id"]):
url, pin_data = self.data_from_pin(pin)
pin_data.update(data)
yield Message.Url, url, pin_data
self.board_id = board["id"]
return {"board": board}
def pins(self):
return self.api.board_pins(self.board_id)
class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin"
directory_fmt = ["{category}", "related {original_pin[id]}"]
pattern = [BASE_PATTERN + r"/pin/([^/?#&]+)/?#related$"]
test = [
("https://www.pinterest.com/pin/858146903966145189/#related", {
"range": (1, 50),
"count": 50,
}),
]
def metadata(self):
pin = self.api.pin(self.pin_id)
return {"original_pin": self.data_from_pin(pin)[1]}
def pins(self):
return self.api.pin_related(self.pin_id)
class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
"""Extractor for related pins of a board from pinterest.com"""
subcategory = "related-board"
directory_fmt = ["{category}", "{board[owner][username]}",
"{board[name]}", "related"]
pattern = [BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$"]
test = [
("https://www.pinterest.com/g1952849/test-/#related", {
"range": (1, 50),
"count": 50,
}),
]
def pins(self):
return self.api.board_related(self.board_id)
class PinterestPinitExtractor(PinterestExtractor):
@ -145,6 +194,11 @@ class PinterestAPI():
options = {"id": pin_id, "field_set_key": "detailed"}
return self._call("Pin", options)["resource_response"]["data"]
def pin_related(self, pin_id):
"""Yield related pins of another pin"""
options = {"pin": pin_id, "add_vase": True, "pins_only": True}
return self._pagination("RelatedPinFeed", options)
def board(self, user, board):
"""Query information about a board"""
options = {"slug": board, "username": user,
@ -156,6 +210,11 @@ class PinterestAPI():
options = {"board_id": board_id}
return self._pagination("BoardFeed", options)
def board_related(self, board_id):
"""Yield related pins of a specific board"""
options = {"board_id": board_id, "add_vase": True}
return self._pagination("BoardRelatedPixieFeed", options)
def _call(self, resource, options):
url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
params = {"data": json.dumps({"options": options}), "source_url": ""}
@ -172,7 +231,8 @@ class PinterestAPI():
return data
if response.status_code == 404 or response.history:
raise exception.NotFoundError(self.extractor.subcategory)
resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource)
self.extractor.log.error("API request failed")
self.extractor.log.debug("%s", response.text)
raise exception.StopExtraction()

View File

@ -76,6 +76,8 @@ SUBCATEGORY_MAP = {
"tag" : "Tag-Searches",
"user" : "Images from Users",
"work" : "Individual Images",
"related-pin" : "related Pins",
"related-board": "",
}
AUTH_MAP = {
@ -238,7 +240,7 @@ columns = [
for extrlist in extractors
], 35),
RstColumn("Capabilities", [
", ".join(extr.subcat for extr in extrlist)
", ".join(extr.subcat for extr in extrlist if extr.subcat)
for extrlist in extractors
], 50),
RstColumn("Authentication", [