[instagram] refactor reel handling

This commit is contained in:
Mike Fährmann 2021-03-04 20:30:28 +01:00
parent 9785c551bc
commit 524ebb133e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -46,10 +46,10 @@ class InstagramExtractor(Extractor):
for post in self.posts():
if post["__typename"] == "GraphReel":
post = self._parse_reel(post["id"])
if "__typename" in post:
post = self._parse_post_graphql(post)
else:
post = self._parse_post(post)
post = self._parse_post_reel(post)
post.update(data)
files = post.pop("_files")
@ -85,8 +85,8 @@ class InstagramExtractor(Extractor):
return response
def _api_request(self, endpoint, params):
url = "https://i.instagram.com/api/" + endpoint
def _request_api(self, endpoint, params=None):
url = "https://i.instagram.com/api" + endpoint
headers = {
"X-CSRFToken" : self.csrf_token,
"X-IG-App-ID" : "936619743392459",
@ -99,7 +99,7 @@ class InstagramExtractor(Extractor):
url, params=params, headers=headers, cookies=cookies,
).json()
def _graphql_request(self, query_hash, variables):
def _request_graphql(self, query_hash, variables):
url = self.root + "/graphql/query/"
params = {
"query_hash": query_hash,
@ -162,7 +162,7 @@ class InstagramExtractor(Extractor):
for key in ("sessionid", "mid", "csrftoken")
}
def _parse_post(self, post):
def _parse_post_graphql(self, post):
if post.get("is_video") and "video_url" not in post:
url = "{}/tv/{}/".format(self.root, post["shortcode"])
post = self._extract_post_page(url)
@ -230,18 +230,12 @@ class InstagramExtractor(Extractor):
return data
def _parse_reel(self, reel_id):
params = {"reel_ids": reel_id}
data = self._api_request("v1/feed/reels_media/", params)
if not data["reels_media"]:
raise exception.NotFoundError("reel")
reel = data["reels_media"][0]
reel_id = reel_id.rpartition(":")[2]
owner = reel["user"]
def _parse_post_reel(self, post):
reel_id = str(post["id"]).rpartition(":")[2]
owner = post["user"]
data = {
"expires" : text.parse_timestamp(reel.get("expiring_at")),
"expires" : text.parse_timestamp(post.get("expiring_at")),
"owner_id" : owner["pk"],
"username" : owner.get("username"),
"fullname" : owner.get("full_name"),
@ -250,7 +244,7 @@ class InstagramExtractor(Extractor):
}
data["_files"] = files = []
for num, item in enumerate(reel["items"], 1):
for num, item in enumerate(post["items"], 1):
image = item["image_versions2"]["candidates"][0]
@ -337,7 +331,7 @@ class InstagramExtractor(Extractor):
}
return user[key]
def _pagination(self, query_hash, variables, data):
def _pagination_graphql(self, query_hash, variables, data):
while True:
for edge in data["edges"]:
yield edge["node"]
@ -352,9 +346,19 @@ class InstagramExtractor(Extractor):
variables["after"] = self._cursor = info["end_cursor"]
self.log.debug("Cursor: %s", self._cursor)
data = next(iter(self._graphql_request(
data = next(iter(self._request_graphql(
query_hash, variables)["user"].values()))
def _pagination_api(self, endpoint, params):
while True:
data = self._request_api(endpoint, params)
yield from data["items"]
info = data["paging_info"]
if not info["more_available"]:
return
params["max_id"] = info["max_id"]
class InstagramUserExtractor(InstagramExtractor):
"""Extractor for an Instagram user profile"""
@ -392,7 +396,7 @@ class InstagramPostsExtractor(InstagramExtractor):
query_hash = "003056d32c2554def87228bc3fd9668a"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
return self._pagination(query_hash, variables, edge)
return self._pagination_graphql(query_hash, variables, edge)
class InstagramChannelExtractor(InstagramExtractor):
@ -411,7 +415,7 @@ class InstagramChannelExtractor(InstagramExtractor):
query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_felix_video_timeline")
return self._pagination(query_hash, variables, edge)
return self._pagination_graphql(query_hash, variables, edge)
class InstagramSavedExtractor(InstagramExtractor):
@ -427,7 +431,7 @@ class InstagramSavedExtractor(InstagramExtractor):
query_hash = "2ce1d673055b99250e93b6f88f878fde"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_saved_media")
return self._pagination(query_hash, variables, edge)
return self._pagination_graphql(query_hash, variables, edge)
class InstagramTagExtractor(InstagramExtractor):
@ -451,9 +455,9 @@ class InstagramTagExtractor(InstagramExtractor):
query_hash = "9b498c08113f1e09617a1703c22b2f32"
variables = {"tag_name": hashtag["name"], "first": 50}
edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
return self._pagination(query_hash, variables, edge)
return self._pagination_graphql(query_hash, variables, edge)
def _pagination(self, query_hash, variables, data):
def _pagination_graphql(self, query_hash, variables, data):
while True:
for edge in data["edges"]:
yield edge["node"]
@ -464,7 +468,7 @@ class InstagramTagExtractor(InstagramExtractor):
variables["after"] = self._cursor = info["end_cursor"]
self.log.debug("Cursor: %s", self._cursor)
data = self._graphql_request(
data = self._request_graphql(
query_hash, variables)["hashtag"]["edge_hashtag_to_media"]
@ -575,7 +579,7 @@ class InstagramPostExtractor(InstagramExtractor):
)
def posts(self):
query_hash = "a9441f24ac73000fa17fe6e6da11d59d"
query_hash = "2c4c2e343a8f64c625ba02b2aa12c7f8"
variables = {
"shortcode" : self.item,
"child_comment_count" : 3,
@ -583,7 +587,7 @@ class InstagramPostExtractor(InstagramExtractor):
"parent_comment_count" : 24,
"has_threaded_comments": True
}
data = self._graphql_request(query_hash, variables)
data = self._request_graphql(query_hash, variables)
media = data.get("shortcode_media")
if not media:
raise exception.NotFoundError("post")
@ -619,7 +623,9 @@ class InstagramStoriesExtractor(InstagramExtractor):
return ()
reel_id = user["id"]
return ({"__typename": "GraphReel", "id": reel_id},)
endpoint = "/v1/feed/reels_media/"
params = {"reel_ids": reel_id}
return self._request_api(endpoint, params)["reels"].values()
class InstagramHighlightsExtractor(InstagramExtractor):
@ -642,12 +648,13 @@ class InstagramHighlightsExtractor(InstagramExtractor):
"include_highlight_reels": True,
"include_live_status": True,
}
data = self._graphql_request(query_hash, variables)
data = self._request_graphql(query_hash, variables)
edges = data["user"]["edge_highlight_reels"]["edges"]
if not edges:
return ()
return [
{
"__typename": "GraphReel",
"id" : "highlight:" + edge["node"]["id"],
}
for edge in data["user"]["edge_highlight_reels"]["edges"]
]
reel_ids = ["highlight:" + edge["node"]["id"] for edge in edges]
endpoint = "/v1/feed/reels_media/?reel_ids=" + \
"&reel_ids=".join(text.quote(rid) for rid in reel_ids)
reels = self._request_api(endpoint)["reels"]
return [reels[rid] for rid in reel_ids]