From 2a9be485119607d21592fcb22ca13585dbddc2b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 25 Jan 2020 22:57:08 +0100 Subject: [PATCH] improve util.load/save_cookiestxt() and add tests - take a file object as argument instead of an filename - accept whitespace before comments (" # comment") - map expiration "0" to None and not the number 0 --- gallery_dl/extractor/common.py | 6 ++- gallery_dl/util.py | 95 ++++++++++++++++---------------- test/test_util.py | 98 +++++++++++++++++++++++++++++++++- 3 files changed, 148 insertions(+), 51 deletions(-) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 20132b1f..80a336de 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -197,7 +197,8 @@ class Extractor(): elif isinstance(cookies, str): cookiefile = util.expand_path(cookies) try: - cookies = util.load_cookiestxt(cookiefile) + with open(cookiefile) as fp: + cookies = util.load_cookiestxt(fp) except Exception as exc: self.log.warning("cookies: %s", exc) else: @@ -217,7 +218,8 @@ class Extractor(): """Store the session's cookiejar in a cookies.txt file""" if self._cookiefile and self.config("cookies-update", True): try: - util.save_cookiestxt(self._cookiefile, self._cookiejar) + with open(self._cookiefile, "w") as fp: + util.save_cookiestxt(fp, self._cookiejar) except OSError as exc: self.log.warning("cookies: %s", exc) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 6b247214..cfa4b070 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -136,66 +136,65 @@ def remove_directory(path): pass -def load_cookiestxt(path): +def load_cookiestxt(fp): """Parse a Netscape cookies.txt file and return a list of its Cookies""" cookies = [] - with open(path) as fp: - for line in fp: + for line in fp: - # strip '#HttpOnly_' and trailing '\n' - if line.startswith("#HttpOnly_"): - line = line[10:] - if line[-1] == "\n": - line = line[:-1] + line = line.lstrip() + # strip '#HttpOnly_' + if line.startswith("#HttpOnly_"): + line = line[10:] + # ignore empty lines and comments + if not line or line[0] in ("#", "$"): + continue + # strip trailing '\n' + if line[-1] == "\n": + line = line[:-1] - # ignore empty lines and comments - if not line or line[0] in ("#", "$"): - continue + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + if not name: + name = value + value = None - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t") - if not name: - name = value - value = None - - cookies.append(Cookie( - 0, name, value, - None, False, - domain, - domain_specified == "TRUE", - domain.startswith("."), - path, False, - secure == "TRUE", - expires or None, - False, None, None, {}, - )) + cookies.append(Cookie( + 0, name, value, + None, False, + domain, + domain_specified == "TRUE", + domain.startswith("."), + path, False, + secure == "TRUE", + None if expires == "0" or not expires else expires, + False, None, None, {}, + )) return cookies -def save_cookiestxt(path, cookies): - """Store 'cookies' in Netscape cookies.txt format""" - with open(path, "w") as fp: - fp.write("# Netscape HTTP Cookie File\n\n") +def save_cookiestxt(fp, cookies): + """Write 'cookies' in Netscape cookies.txt format to 'fp'""" + fp.write("# Netscape HTTP Cookie File\n\n") - for cookie in cookies: - if cookie.value is None: - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value + for cookie in cookies: + if cookie.value is None: + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value - fp.write("\t".join(( - cookie.domain, - "TRUE" if cookie.domain.startswith(".") else "FALSE", - cookie.path, - "TRUE" if cookie.secure else "FALSE", - "0" if cookie.expires is None else str(cookie.expires), - name, - value, - )) + "\n") + fp.write("\t".join(( + cookie.domain, + "TRUE" if cookie.domain.startswith(".") else "FALSE", + cookie.path, + "TRUE" if cookie.secure else "FALSE", + "0" if cookie.expires is None else str(cookie.expires), + name, + value, + )) + "\n") def code_to_language(code, default=None): diff --git a/test/test_util.py b/test/test_util.py index 5a103cf3..f38c157d 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,10 @@ import unittest import sys +import io import random import string +import http.cookiejar from gallery_dl import util, text, exception @@ -158,6 +160,100 @@ class TestISO639_1(unittest.TestCase): self.assertEqual(func(*args), result) +class TestCookiesTxt(unittest.TestCase): + + def test_load_cookiestxt(self): + + def _assert(content, expected): + cookies = util.load_cookiestxt(io.StringIO(content, None)) + for c, e in zip(cookies, expected): + self.assertEqual(c.__dict__, e.__dict__) + + _assert("", []) + _assert("\n\n\n", []) + _assert("$ Comment", []) + _assert("# Comment", []) + _assert(" # Comment \n\n $ Comment ", []) + _assert( + ".example.org\tTRUE\t/\tTRUE\t0\tname\tvalue", + [self._cookie("name", "value", ".example.org")], + ) + _assert( + ".example.org\tTRUE\t/\tTRUE\t\tname\t", + [self._cookie("name", "", ".example.org")], + ) + _assert( + "# Netscape HTTP Cookie File\n" + "\n" + "# default\n" + ".example.org TRUE / FALSE 0 n1 v1\n" + ".example.org TRUE / TRUE 2145945600 n2 v2\n" + ".example.org TRUE /path FALSE 0 n3\n" + "\n" + " # # extra # # \n" + "www.example.org FALSE / FALSE n4 \n" + "www.example.org FALSE /path FALSE 100 n5 v5\n", + [ + self._cookie( + "n1", "v1", ".example.org", True, "/", False), + self._cookie( + "n2", "v2", ".example.org", True, "/", True, 2145945600), + self._cookie( + "n3", None, ".example.org", True, "/path", False), + self._cookie( + "n4", "" , "www.example.org", False, "/", False), + self._cookie( + "n5", "v5", "www.example.org", False, "/path", False, 100), + ], + ) + + with self.assertRaises(ValueError): + util.load_cookiestxt("example.org\tTRUE\t/\tTRUE\t0\tname") + + def test_save_cookiestxt(self): + + def _assert(cookies, expected): + fp = io.StringIO(newline=None) + util.save_cookiestxt(fp, cookies) + self.assertMultiLineEqual(fp.getvalue(), expected) + + _assert([], "# Netscape HTTP Cookie File\n\n") + _assert( + [self._cookie("name", "value", ".example.org")], + "# Netscape HTTP Cookie File\n\n" + ".example.org\tTRUE\t/\tTRUE\t0\tname\tvalue\n", + ) + _assert( + [ + self._cookie( + "n1", "v1", ".example.org", True, "/", False), + self._cookie( + "n2", "v2", ".example.org", True, "/", True, 2145945600), + self._cookie( + "n3", None, ".example.org", True, "/path", False), + self._cookie( + "n4", "" , "www.example.org", False, "/", False), + self._cookie( + "n5", "v5", "www.example.org", False, "/path", False, 100), + ], + "# Netscape HTTP Cookie File\n" + "\n" + ".example.org TRUE / FALSE 0 n1 v1\n" + ".example.org TRUE / TRUE 2145945600 n2 v2\n" + ".example.org TRUE /path FALSE 0 n3\n" + "www.example.org FALSE / FALSE 0 n4 \n" + "www.example.org FALSE /path FALSE 100 n5 v5\n", + ) + + def _cookie(self, name, value, domain, domain_specified=True, + path="/", secure=True, expires=None): + return http.cookiejar.Cookie( + 0, name, value, None, False, + domain, domain_specified, domain.startswith("."), + path, False, secure, expires, False, None, None, {}, + ) + + class TestFormatter(unittest.TestCase): kwdict = {