[plurk] add delay between comment requests

This commit is contained in:
Mike Fährmann 2019-12-01 01:03:31 +01:00
parent a28552fd19
commit 8759403f37
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -11,6 +11,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, extractor, exception from .. import text, extractor, exception
import datetime import datetime
import time
import json import json
import re import re
@ -47,14 +48,21 @@ class PlurkExtractor(Extractor):
"""Return an iterable with a 'plurk's comments""" """Return an iterable with a 'plurk's comments"""
url = "https://www.plurk.com/Responses/get" url = "https://www.plurk.com/Responses/get"
data = {"plurk_id": plurk["id"], "count": "200"} data = {"plurk_id": plurk["id"], "count": "200"}
headers = {
"Origin": self.root,
"Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
while True: while True:
info = self.request(url, method="POST", data=data).json() info = self.request(
url, method="POST", headers=headers, data=data).json()
yield from info["responses"] yield from info["responses"]
if not info["has_newer"]: if not info["has_newer"]:
return return
elif info["has_newer"] < 200: elif info["has_newer"] < 200:
del data["count"] del data["count"]
time.sleep(1)
data["from_response_id"] = info["responses"][-1]["id"] + 1 data["from_response_id"] = info["responses"][-1]["id"] + 1
@staticmethod @staticmethod
@ -83,9 +91,9 @@ class PlurkTimelineExtractor(PlurkExtractor):
user_id, pos = text.extract(page, '"user_id":', ',') user_id, pos = text.extract(page, '"user_id":', ',')
plurks = self._load(text.extract(page, "_PLURKS = ", ";\n", pos)[0]) plurks = self._load(text.extract(page, "_PLURKS = ", ";\n", pos)[0])
url = "https://www.plurk.com/TimeLine/getPlurks"
data = {"user_id": user_id.strip()}
headers = {"Referer": url, "X-Requested-With": "XMLHttpRequest"} headers = {"Referer": url, "X-Requested-With": "XMLHttpRequest"}
data = {"user_id": user_id.strip()}
url = "https://www.plurk.com/TimeLine/getPlurks"
while plurks: while plurks:
yield from plurks yield from plurks