Revert "Preliminary 0.0.7 changes"

This reverts commit 8cc97905a7.
2017-01-04 14:08:56 -05:00 · 2017-01-04 14:08:56 -05:00 · d8ce58e66d
parent 8cc97905a7
commit d8ce58e66d
11 changed files with 194 additions and 183 deletions
--- a/.gitignore
+++ b/.gitignore
@ -38,4 +38,3 @@ nosetests.xml
 .pydevproject
 *.iml
 *.xml
-bandcamp_dl/asyncdownloader.py
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -17,9 +17,3 @@ Version 0.0.6
 - [Enhancement] Individual track downloads work now.
 - [Bugfix] Fixed imports, now working when installed via pip.
 - [Note] Last version to officially support Python 2.7.x
-
-Version 0.0.7
-------------
- [Dependency] Slimit is no longer required
- [Dependency] Ply is no longer required
- [Dependency] demjson is now required
--- a/README.rst
+++ b/README.rst
@ -24,7 +24,7 @@ Description
 ===========

 bandcamp-dl is a small command-line app to download audio from
-BandCamp.com. It requires the Python interpreter, version 3.5.x and is
+BandCamp.com. It requires the Python interpreter, version 2.7.x - 3.5.x and is
 not platform specific. It is released to the public domain, which means
 you can modify it, redistribute it or use it how ever you like.

@ -209,11 +209,14 @@ related to bandcamp-dl, by all means, go ahead and report the bug.
 Dependencies
 ============

-  `BeautifulSoup <https://pypi.python.org/pypi/beautifulsoup4>`_ - HTML Parsing
-  `Demjson <https://pypi.python.org/pypi/demjson>`_- JavaScript dict to JSON conversion
+-  `BeautifulSoup <https://pypi.python.org/pypi/beautifulsoup4>`_ -
+   HTML Parsing
 -  `Mutagen <https://pypi.python.org/pypi/mutagen>`_ - ID3 Encoding
-  `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving the HTML
-  `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ - A slug generator that turns strings into unicode slugs.
+-  `Requests <https://pypi.python.org/pypi/requests>`_ - for retriving
+   the HTML
+-  `Slimit <https://pypi.python.org/pypi/slimit>`_ - Javascript parsing
+-  `Unicode-Slugify <https://pypi.python.org/pypi/unicode-slugify>`_ -
+   A slug generator that turns strings into unicode slugs.

 Copyright
 =========
--- a/bandcamp_dl/bandcamp.py
+++ b/bandcamp_dl/bandcamp.py
@ -1,120 +1,119 @@
-from .bandcampjson import BandcampJSON
 from bs4 import BeautifulSoup
-from bs4 import FeatureNotFound
 import requests
-import json
+from .jsobj import read_js_object


 class Bandcamp:
-    def parse(self, url: str, art: bool=True) -> dict or None:
-        """
-        Requests the page, cherry picks album info
-
-        :param url: album/track url
-        :param art: if True download album art
-        :return: album metadata
-        """
+    def parse(self, url, no_art=True):
        try:
            r = requests.get(url)
        except requests.exceptions.MissingSchema:
            return None

+        self.no_art = no_art
+
+        if r.status_code is not 200:
+            return None
+
        try:
            self.soup = BeautifulSoup(r.text, "lxml")
-        except FeatureNotFound:
+        except:
            self.soup = BeautifulSoup(r.text, "html.parser")

-        self.generate_album_json()
-        self.tracks = self.tralbum_data_json['trackinfo']
-
        album = {
            "tracks": [],
-            "title": self.embed_data_json['album_title'],
-            "artist": self.embed_data_json['artist'],
+            "title": "",
+            "artist": "",
            "full": False,
            "art": "",
-            "date": self.tralbum_data_json['album_release_date']
+            "date": ""
        }

-        for track in self.tracks:
-            track = self.get_track_metadata(track)
+        album_meta = self.extract_album_meta_data(r)
+
+        album['artist'] = album_meta['artist']
+        album['title'] = album_meta['title']
+        album['date'] = album_meta['date']
+
+        for track in album_meta['tracks']:
+            track = self.get_track_meta_data(track)
            album['tracks'].append(track)

-        album['full'] = self.all_tracks_available()
-        if art:
+        album['full'] = self.all_tracks_available(album)
+        if self.no_art:
            album['art'] = self.get_album_art()

        return album

-    def all_tracks_available(self) -> bool:
-        """
-        Verify that all tracks have a url
-
-        :return: True if all urls accounted for
-        """
-        for track in self.tracks:
-            if track['file']['mp3-128'] is None:
+    def all_tracks_available(self, album):
+        for track in album['tracks']:
+            if track['url'] is None:
                return False
+
        return True

-    @staticmethod
-    def get_track_metadata(track: dict) -> dict:
-        """
-        Extract individual track metadata
+    def is_basestring(self, obj):
+        if isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, bytearray):
+            return True
+        return False

-        :param track: track dict
-        :return: track metadata dict
-        """
-        track_metadata = {
-            "duration": track['duration'],
-            "track": str(track['track_num']),
-            "title": track['title'],
-            "url": None
-        }
+    def get_track_meta_data(self, track):
+        new_track = {}
+        if not self.is_basestring(track['file']):
+            if 'mp3-128' in track['file']:
+                new_track['url'] = track['file']['mp3-128']
+        else:
+            new_track['url'] = None

-        if 'mp3-128' in track['file']:
-            track_metadata['url'] = "http:" + track['file']['mp3-128']
-        return track_metadata
+        new_track['duration'] = track['duration']
+        new_track['track'] = track['track_num']
+        new_track['title'] = track['title']

-    def generate_album_json(self):
-        """
-        Retrieve JavaScript dictionaries from page and generate JSON
+        return new_track

-        :return: True if successful
-        """
-        try:
-            embed = BandcampJSON(self.soup, "EmbedData")
-            tralbum = BandcampJSON(self.soup, "TralbumData")
+    def extract_album_meta_data(self, request):
+        album = {}

-            embed_data = embed.js_to_json()
-            tralbum_data = tralbum.js_to_json()
+        embedData = self.get_embed_string_block(request)

-            self.embed_data_json = json.loads(embed_data)
-            self.tralbum_data_json = json.loads(tralbum_data)
-        except Exception as e:
-            print(e)
-            return None
-        return True
+        block = request.text.split("var TralbumData = ")
+
+        stringBlock = block[1]
+
+        stringBlock = stringBlock.split("};")[0] + "};"
+        stringBlock = read_js_object(u"var TralbumData = {}".format(stringBlock))
+
+        if 'album_title' not in embedData['EmbedData']:
+            album['title'] = "Unknown Album"
+        else:
+            album['title'] = embedData['EmbedData']['album_title']
+
+        album['artist'] = stringBlock['TralbumData']['artist']
+        album['tracks'] = stringBlock['TralbumData']['trackinfo']
+
+        if stringBlock['TralbumData']['album_release_date'] == "null":
+            album['date'] = ""
+        else:
+            album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2]
+
+        return album

    @staticmethod
-    def generate_album_url(artist: str, album: str) -> str:
-        """
-        Generate an album url based on the artist and album name
-
-        :param artist: artist name
-        :param album: album name
-        :return: album url as str
-        """
+    def generate_album_url(artist, album):
        return "http://{0}.bandcamp.com/album/{1}".format(artist, album)

-    def get_album_art(self) -> str:
-        """
-        Find and retrieve album art url from page
-
-        :return: url as str
-        """
+    def get_album_art(self):
        try:
            url = self.soup.find(id='tralbumArt').find_all('img')[0]['src']
            return url
-        except None:
+        except:
            pass
+
+    def get_embed_string_block(self, request):
+        embedBlock = request.text.split("var EmbedData = ")
+
+        embedStringBlock = embedBlock[1]
+        embedStringBlock = embedStringBlock.split("};")[0] + "};"
+        embedStringBlock = read_js_object(u"var EmbedData = {}".format(embedStringBlock))
+
+        return embedStringBlock
--- a/bandcamp_dl/bandcamp_dl.py
+++ b/bandcamp_dl/bandcamp_dl.py
@ -49,7 +49,7 @@ from .bandcampdownloader import BandcampDownloader


 def main():
-    arguments = docopt(__doc__, version='bandcamp-dl 0.0.7')
+    arguments = docopt(__doc__, version='bandcamp-dl 0.0.6-01')
    bandcamp = Bandcamp()

    if arguments['--artist'] and arguments['--album']:
@ -73,4 +73,3 @@ def main():

 if __name__ == '__main__':
    main()
-
--- a/bandcamp_dl/bandcampdownloader.py
+++ b/bandcamp_dl/bandcampdownloader.py
@ -9,14 +9,6 @@ from slugify import slugify

 class BandcampDownloader:
    def __init__(self, urls=None, template=None, directory=None, overwrite=False):
-        """
-        Initialization function
-
-        :param urls: list of urls
-        :param template: filename template
-        :param directory: download location
-        :param overwrite: if True overwrite existing files
-        """
        if type(urls) is str:
            self.urls = [urls]

@ -25,22 +17,11 @@ class BandcampDownloader:
        self.directory = directory
        self.overwrite = overwrite

-    def start(self, album: dict):
-        """
-        Start album download process
-
-        :param album: album dict
-        """
+    def start(self, album):
        print("Starting download process.")
        self.download_album(album)

-    def template_to_path(self, track: dict) -> str:
-        """
-        Create valid filepath based on track metadata
-
-        :param track: track metadata
-        :return: filepath
-        """
+    def template_to_path(self, track):
        path = self.template
        path = path.replace("%{artist}", slugify(track['artist']))
        path = path.replace("%{album}", slugify(track['album']))
@ -50,27 +31,14 @@ class BandcampDownloader:

        return path

-    @staticmethod
-    def create_directory(filename: str) -> str:
-        """
-        Create directory based on filename if it doesn't exist
-
-        :param filename: full filename
-        :return: directory path
-        """
+    def create_directory(self, filename):
        directory = os.path.dirname(filename)
        if not os.path.exists(directory):
            os.makedirs(directory)

        return directory

-    def download_album(self, album: dict) -> bool:
-        """
-        Download all MP3 files in the album
-
-        :param album: album dict
-        :return: True if successful
-        """
+    def download_album(self, album):
        for track_index, track in enumerate(album['tracks']):
            track_meta = {
                "artist": album['artist'],
@ -85,17 +53,30 @@ class BandcampDownloader:
            filename = self.template_to_path(track_meta)
            dirname = self.create_directory(filename)

-            if not track['url']:
+            if not track.get('url'):
                print("Skipping track {0} - {1} as it is not available"
                      .format(track['track'], track['title']))
                continue

            try:
                track_url = track['url']
+                # Check and see if HTTP is in the track_url
+                if 'http' not in track_url:
+                    track_url = 'http:{}'.format(track_url)

                r = requests.get(track_url, stream=True)
                file_length = r.headers.get('content-length')

+                if not self.overwrite and os.path.isfile(filename):
+                    file_size = os.path.getsize(filename) - 128
+                    if int(file_size) != int(file_length):
+                        print(filename + " is incomplete, redownloading.")
+                        os.remove(filename)
+                    else:
+                        print("Skipping track {0} - {1} as it's already downloaded, use --overwrite to overwrite existing files"
+                            .format(track['track'], track['title']))
+                        continue
+
                with open(filename, "wb") as f:
                    print("Downloading: " + filename[:-4])
                    if file_length is None:
@ -125,14 +106,7 @@ class BandcampDownloader:

        return True

-    @staticmethod
-    def write_id3_tags(filename: str, meta: dict):
-        """
-        Write metadata to the MP3 file
-
-        :param filename: name of mp3 file
-        :param meta: dict of track metadata
-        """
+    def write_id3_tags(self, filename, meta):
        print("\nEncoding . . .")

        audio = MP3(filename)
--- a/bandcamp_dl/bandcampjson.py
+++ b/bandcamp_dl/bandcampjson.py
@ -1,42 +0,0 @@
-import demjson
-import re
-
-
-class BandcampJSON:
-    def __init__(self, body, var_name: str, js_data=None):
-        self.body = body
-        self.var_name = var_name
-        self.js_data = js_data
-
-    def get_js(self) -> str:
-        """
-        Get <script> element containing the data we need and return the raw JS
-
-        :return js_data: Raw JS as str
-        """
-        self.js_data = self.body.find("script", {"src": False}, text=re.compile(self.var_name)).string
-        return self.js_data
-
-    def extract_data(self, js: str) -> str:
-        """
-        Extract values from JS dictionary
-
-        :param js: Raw JS
-        :return: Contents of dictionary as str
-        """
-        self.js_data = re.search(r"(?<=var\s" + self.var_name + "\s=\s)[^;]*", js).group().replace('" + "', '')
-        return self.js_data
-
-    def js_to_json(self) -> str:
-        """
-        Convert JavaScript dictionary to JSON
-
-        :return: JSON as str
-        """
-        js = self.get_js()
-        data = self.extract_data(js)
-        # Decode with demjson first to reformat keys and lists
-        js_data = demjson.decode(data)
-        # Encode to make valid JSON
-        js_data = demjson.encode(js_data)
-        return js_data
--- a/bandcamp_dl/deps.txt
+++ b/bandcamp_dl/deps.txt
@ -1,6 +1,7 @@
 beautifulsoup4==4.5.1
-demjson==2.2.4
 docopt==0.6.2
 mutagen==1.35.1
+ply==3.9
 requests==2.12.4
+slimit==0.8.1
 unicode-slugify==0.1.3
--- a/bandcamp_dl/jsobj.py
+++ b/bandcamp_dl/jsobj.py
@ -0,0 +1,81 @@
+"""
+Simple JavaScript/ECMAScript object literal reader
+    Only supports object literals wrapped in `var x = ...;` statements, so you
+      might want to do read_js_object('var x = %s;' % literal) if it's in another format.
+
+    Requires the slimit <https://github.com/rspivak/slimit> library for parsing.
+
+    Basic constand folding on strings and numbers is done, e.g. "hi " + "there!" reduces to "hi there!",
+    and 1+1 reduces to 2.
+
+    Copyright (c) 2013 darkf
+    Licensed under the terms of the WTFPL:
+
+        DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+                        Version 2, December 2004
+
+        Everyone is permitted to copy and distribute verbatim or modified
+        copies of this license document, and changing it is allowed as long
+        as the name is changed.
+
+                   DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+          TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+          0. You just DO WHAT THE FUCK YOU WANT TO.
+"""
+
+from slimit.parser import Parser
+import slimit.ast as ast
+
+
+def read_js_object(code):
+    parser = Parser()
+
+    def visit(node):
+        if isinstance(node, ast.Program):
+            d = {}
+            for child in node:
+                if not isinstance(child, ast.VarStatement):
+                    raise ValueError("All statements should be var statements")
+                key, val = visit(child)
+                d[key] = val
+            return d
+        elif isinstance(node, ast.VarStatement):
+            return visit(node.children()[0])
+        elif isinstance(node, ast.VarDecl):
+            return visit(node.identifier), visit(node.initializer)
+        elif isinstance(node, ast.Object):
+            d = {}
+            for property in node:
+                key = visit(property.left)
+                value = visit(property.right)
+                d[key] = value
+            return d
+        elif isinstance(node, ast.BinOp):
+            # simple constant folding
+            if node.op == '+':
+                if isinstance(node.left, ast.String) and isinstance(node.right, ast.String):
+                    return visit(node.left) + visit(node.right)
+                elif isinstance(node.left, ast.Number) and isinstance(node.right, ast.Number):
+                    return visit(node.left) + visit(node.right)
+                else:
+                    raise ValueError("Cannot + on anything other than two literals")
+            else:
+                raise ValueError("Cannot do operator '{}'".format(node.op))
+
+        elif isinstance(node, ast.String):
+            return node.value.strip('"').strip("'")
+        elif isinstance(node, ast.Array):
+            return [visit(x) for x in node]
+        elif isinstance(node, ast.Number) or isinstance(node, ast.Identifier)\
+                or isinstance(node, ast.Boolean) or isinstance(node, ast.Null):
+            return node.value
+        else:
+            raise Exception("Unhandled node: {}".format(node))
+
+    return visit(parser.parse(code))
+
+
+if __name__ == "__main__":
+    print(read_js_object("""var foo = {x: 10, y: "hi " + "there!"};
+                            var bar = {derp: ["herp", "it", "up", "forever"]};"""))
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,9 @@
 --index-url https://pypi.python.org/simple/

 beautifulsoup4==4.5.1
-demjson==2.2.4
 docopt==0.6.2
 mutagen==1.35.1
+ply==3.9
 requests==2.12.4
-unicode-slugify==0.1.3
+slimit==0.8.1
+unicode-slugify==0.1.3
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ here = path.abspath(path.dirname(__file__))

 setup(
    name='bandcamp-downloader',
-    version='0.0.7',
+    version='0.0.6-01',
    description='bandcamp-dl downloads albums and tracks from Bandcamp for you',
    long_description=open('README.rst').read(),
    url='https://github.com/iheanyi/bandcamp-dl',
@ -18,16 +18,18 @@ setup(
        'Intended Audience :: End Users/Desktop',
        'Topic :: Multimedia :: Sound/Audio',
        'License :: Public Domain',
+        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3.5',
    ],
    keywords=['bandcamp', 'downloader', 'music', 'cli', 'albums', 'dl'],
    packages=find_packages(),
    install_requires=[
        'beautifulsoup4',
-        'demjson',
        'docopt',
        'mutagen',
+        'ply',
        'requests',
+        'slimit',
        'unicode-slugify',
    ],
    entry_points={