[reddit] add ability to load more comments (#15)
The 'extractor.reddit.morecomments' option enables the use of the '/api/morechildren' API endpoint (1) to load even more comments than the usual submission-request provides. Possible values are the booleans 'true' and 'false' (default). Note: this feature comes at the cost of 1 extra API call towards the rate limit for every 100 extra comments. (1) https://www.reddit.com/dev/api/#GET_api_morechildren
This commit is contained in:
parent
05ed95e5b0
commit
56bec79e6a
@ -108,6 +108,7 @@ class RedditAPI():
|
||||
def __init__(self, extractor):
|
||||
self.extractor = extractor
|
||||
self.comments = extractor.config("comments", 500)
|
||||
self.morecomments = extractor.config("morecomments", False)
|
||||
self.refresh_token = extractor.config("refresh-token")
|
||||
self.log = extractor.log
|
||||
self.session = extractor.session
|
||||
@ -116,9 +117,10 @@ class RedditAPI():
|
||||
def submission(self, submission_id):
|
||||
"""Fetch the (submission, comments)=-tuple for a submission id"""
|
||||
endpoint = "/comments/" + submission_id + "/.json"
|
||||
link_id = "t3_" + submission_id if self.morecomments else None
|
||||
submission, comments = self._call(endpoint, {"limit": self.comments})
|
||||
return (submission["data"]["children"][0]["data"],
|
||||
self._unfold(comments))
|
||||
self._flatten(comments, link_id))
|
||||
|
||||
def submissions_subreddit(self, subreddit, params):
|
||||
"""Collect all (submission, comments)-tuples of a subreddit"""
|
||||
@ -126,6 +128,24 @@ class RedditAPI():
|
||||
params["limit"] = 100
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def morechildren(self, link_id, children):
|
||||
"""Load additional comments from a submission"""
|
||||
endpoint = "/api/morechildren"
|
||||
params = {"link_id": link_id, "api_type": "json"}
|
||||
index, done = 0, False
|
||||
while not done:
|
||||
if len(children) - index < 100:
|
||||
done = True
|
||||
params["children"] = ",".join(children[index:index + 100])
|
||||
index += 100
|
||||
|
||||
data = self._call(endpoint, params)["json"]
|
||||
for thing in data["data"]["things"]:
|
||||
if thing["kind"] == "more":
|
||||
children.extend(thing["data"]["children"])
|
||||
else:
|
||||
yield thing["data"]
|
||||
|
||||
def authenticate(self):
|
||||
"""Authenticate the application by requesting an access token"""
|
||||
access_token = self._authenticate_impl(self.refresh_token)
|
||||
@ -190,15 +210,18 @@ class RedditAPI():
|
||||
return
|
||||
params["after"] = data["after"]
|
||||
|
||||
@staticmethod
|
||||
def _unfold(comments):
|
||||
# TODO: order?
|
||||
def _flatten(self, comments, link_id=None):
|
||||
extra = []
|
||||
queue = comments["data"]["children"]
|
||||
while queue:
|
||||
comment = queue.pop()
|
||||
comment = queue.pop(0)
|
||||
if comment["kind"] == "more":
|
||||
if link_id:
|
||||
extra.extend(comment["data"]["children"])
|
||||
continue
|
||||
comment = comment["data"]
|
||||
yield comment
|
||||
if comment["replies"]:
|
||||
queue += comment["replies"]["data"]["children"]
|
||||
if link_id and extra:
|
||||
yield from self.morechildren(link_id, extra)
|
||||
|
Loading…
x
Reference in New Issue
Block a user