gallery-dl/gallery_dl/extractor/flickr.py

# -*- coding: utf-8 -*-

# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extract images from https://www.flickr.com/"""

from .common import Extractor, Message
from .. import text, exception


class FlickrExtractor(Extractor):
    """Base class for flickr extractors"""
    category = "flickr"
    filename_fmt = "{category}_{id}.{extension}"

    def __init__(self, match):
        Extractor.__init__(self)
        self.api = FlickrAPI(self)
        self.item_id = match.group(1)
        self.metadata = self.config("metadata", False)

    @staticmethod
    def _clean(photo):
        del photo["comments"]
        del photo["views"]

        photo["title"] = photo["title"]["_content"]
        photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]

        if "location" in photo:
            location = photo["location"]
            for key, value in location.items():
                if isinstance(value, dict):
                    location[key] = value["_content"]


class FlickrImageExtractor(FlickrExtractor):
    """Extractor for individual images from flickr.com"""
    subcategory = "image"
    pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/]+/(\d+)",
               r"(?:https?://)?[^.]+\.staticflickr\.com/(?:\d+/)+(\d+)_"]
    test = [
        ("https://www.flickr.com/photos/departingyyz/16089302239", {
            "url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
            "keyword": "5ecdaf0192802451b7daca9b81f393f207ff7ee9",
            "content": "6aaad7512d335ca93286fe2046e7fe3bb93d808e",
        }),
        ("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
            "url": "40f5163488522ca5d918750ed7bd7fcf437982fe",
        }),
        ("https://www.flickr.com/photos/zzz/16089302238", {
            "exception": exception.NotFoundError,
        }),
    ]

    def items(self):
        size = self.api.photos_getSizes(self.item_id)[-1]

        if self.metadata:
            info = self.api.photos_getInfo(self.item_id)
            self._clean(info)
        else:
            info = {"id": self.item_id}

        info["photo"] = size
        url = size["source"]
        text.nameext_from_url(url, info)

        yield Message.Version, 1
        yield Message.Directory, info
        yield Message.Url, url, info


class FlickrAlbumExtractor(FlickrExtractor):
    """Extractor for photo albums from flickr.com"""
    subcategory = "album"
    directory_fmt = ["{category}", "{id} - {title}"]
    pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/"
               r"photos/[^/]+/(?:album|set)s/(\d+)"]
    test = [("https://www.flickr.com/photos/flickr/albums/72157656845052880", {
        "url": "517db3faa55e88686f1d00a379f8f0daf4c7b837",
        "keyword": "504ca926fe520dc6e4a98e7ee590c3498a3c3392",
    })]

    def items(self):
        first = True
        yield Message.Version, 1

        for photo in self.api.photosets_getPhotos(self.item_id):
            if first:
                first = False
                yield Message.Directory, photo["photoset"].copy()
            url = photo["photo"]["source"]
            yield Message.Url, url, text.nameext_from_url(url, photo)


class FlickrAPI():
    """Minimal interface for the flickr API"""
    api_url = "https://api.flickr.com/services/rest/"
    formats = [("o", "Original"), ("k", "Large 2048"),
               ("h", "Large 1600"), ("l", "Large")]

    def __init__(self, extractor, api_key="ac4fd7aa98585b9eee1ba761c209de68"):
        self.session = extractor.session
        self.subcategory = extractor.subcategory
        self.api_key = api_key

    def photos_getInfo(self, photo_id):
        params = {"photo_id": photo_id}
        return self._call("photos.getInfo", params)["photo"]

    def photos_getSizes(self, photo_id):
        params = {"photo_id": photo_id}
        return self._call("photos.getSizes", params)["sizes"]["size"]

    def photosets_getPhotos(self, photoset_id):
        method = "photosets.getPhotos"
        params = {"photoset_id": photoset_id, "page": 1,
                  "extras": "url_o,url_k,url_h,url_l"}
        while True:
            photoset = self._call(method, params)["photoset"]

            photos = photoset["photo"]
            del photoset["photo"]
            del photoset["page"]
            del photoset["perpage"]
            del photoset["per_page"]

            for photo in photos:

                for fmt, fmtname in self.formats:
                    key = "url_" + fmt
                    if key in photo:
                        # generate photo info
                        photo["photo"] = {
                            "source": photo[key],
                            "width" : photo["width_" + fmt],
                            "height": photo["height_" + fmt],
                            "label" : fmtname,
                            "media" : "photo",
                        }
                        # remove excess data
                        keys = [
                            key for key in photo.keys()
                            if key.startswith(("url_", "width_", "height_"))
                        ]
                        for key in keys:
                            del photo[key]
                        break

                else:
                    # extra API call to get photo url and size
                    print(photo["id"])
                    photo["photo"] = self.photos_getSizes(photo["id"])[-1]

                photo["photoset"] = photoset
                yield photo

            if params["page"] == photoset["pages"]:
                break
            params["page"] += 1

    def _call(self, method, params):
        params["method"] = "flickr." + method
        params["api_key"] = self.api_key
        params["format"] = "json"
        params["nojsoncallback"] = "1"
        data = self.session.get(self.api_url, params=params).json()
        if "code" in data and data["code"] == 1:
            raise exception.NotFoundError(self.subcategory)
        return data
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`# -- coding: utf-8 --`

			`# Copyright 2017 Mike Fährmann`
			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

			`"""Extract images from https://www.flickr.com/"""`

			`from .common import Extractor, Message`
			`from .. import text, exception`


			`class FlickrExtractor(Extractor):`
			`"""Base class for flickr extractors"""`
			`category = "flickr"`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`filename_fmt = "{category}_{id}.{extension}"`

			`def __init__(self, match):`
			`Extractor.__init__(self)`
			`self.api = FlickrAPI(self)`
			`self.item_id = match.group(1)`
			`self.metadata = self.config("metadata", False)`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00
			`@staticmethod`
			`def _clean(photo):`
			`del photo["comments"]`
			`del photo["views"]`

			`photo["title"] = photo["title"]["_content"]`
			`photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]`

			`if "location" in photo:`
			`location = photo["location"]`
			`for key, value in location.items():`
			`if isinstance(value, dict):`
			`location[key] = value["_content"]`


			`class FlickrImageExtractor(FlickrExtractor):`
			`"""Extractor for individual images from flickr.com"""`
			`subcategory = "image"`
			`pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/]+/(\d+)",`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`r"(?:https?://)?[^.]+\.staticflickr\.com/(?:\d+/)+(\d+)_"]`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`test = [`
			`("https://www.flickr.com/photos/departingyyz/16089302239", {`
			`"url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",`
			`"keyword": "5ecdaf0192802451b7daca9b81f393f207ff7ee9",`
			`"content": "6aaad7512d335ca93286fe2046e7fe3bb93d808e",`
			`}),`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {`
			`"url": "40f5163488522ca5d918750ed7bd7fcf437982fe",`
			`}),`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`("https://www.flickr.com/photos/zzz/16089302238", {`
			`"exception": exception.NotFoundError,`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`}),`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`]`

			`def items(self):`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`size = self.api.photos_getSizes(self.item_id)[-1]`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00
			`if self.metadata:`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`info = self.api.photos_getInfo(self.item_id)`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`self._clean(info)`
			`else:`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`info = {"id": self.item_id}`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00
			`info["photo"] = size`
			`url = size["source"]`
			`text.nameext_from_url(url, info)`

			`yield Message.Version, 1`
			`yield Message.Directory, info`
			`yield Message.Url, url, info`


[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`class FlickrAlbumExtractor(FlickrExtractor):`
			`"""Extractor for photo albums from flickr.com"""`
			`subcategory = "album"`
			`directory_fmt = ["{category}", "{id} - {title}"]`
			`pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/"`
			`r"photos/[^/]+/(?:album\|set)s/(\d+)"]`
			`test = [("https://www.flickr.com/photos/flickr/albums/72157656845052880", {`
			`"url": "517db3faa55e88686f1d00a379f8f0daf4c7b837",`
			`"keyword": "504ca926fe520dc6e4a98e7ee590c3498a3c3392",`
			`})]`

			`def items(self):`
			`first = True`
			`yield Message.Version, 1`

			`for photo in self.api.photosets_getPhotos(self.item_id):`
			`if first:`
			`first = False`
			`yield Message.Directory, photo["photoset"].copy()`
			`url = photo["photo"]["source"]`
			`yield Message.Url, url, text.nameext_from_url(url, photo)`


[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`class FlickrAPI():`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`"""Minimal interface for the flickr API"""`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00			`api_url = "https://api.flickr.com/services/rest/"`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`formats = [("o", "Original"), ("k", "Large 2048"),`
			`("h", "Large 1600"), ("l", "Large")]`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00
			`def __init__(self, extractor, api_key="ac4fd7aa98585b9eee1ba761c209de68"):`
			`self.session = extractor.session`
			`self.subcategory = extractor.subcategory`
			`self.api_key = api_key`

			`def photos_getInfo(self, photo_id):`
			`params = {"photo_id": photo_id}`
			`return self._call("photos.getInfo", params)["photo"]`

			`def photos_getSizes(self, photo_id):`
			`params = {"photo_id": photo_id}`
[flickr] add album extractor 2017-05-31 17:31:51 +02:00			`return self._call("photos.getSizes", params)["sizes"]["size"]`

			`def photosets_getPhotos(self, photoset_id):`
			`method = "photosets.getPhotos"`
			`params = {"photoset_id": photoset_id, "page": 1,`
			`"extras": "url_o,url_k,url_h,url_l"}`
			`while True:`
			`photoset = self._call(method, params)["photoset"]`

			`photos = photoset["photo"]`
			`del photoset["photo"]`
			`del photoset["page"]`
			`del photoset["perpage"]`
			`del photoset["per_page"]`

			`for photo in photos:`

			`for fmt, fmtname in self.formats:`
			`key = "url_" + fmt`
			`if key in photo:`
			`# generate photo info`
			`photo["photo"] = {`
			`"source": photo[key],`
			`"width" : photo["width_" + fmt],`
			`"height": photo["height_" + fmt],`
			`"label" : fmtname,`
			`"media" : "photo",`
			`}`
			`# remove excess data`
			`keys = [`
			`key for key in photo.keys()`
			`if key.startswith(("url_", "width_", "height_"))`
			`]`
			`for key in keys:`
			`del photo[key]`
			`break`

			`else:`
			`# extra API call to get photo url and size`
			`print(photo["id"])`
			`photo["photo"] = self.photos_getSizes(photo["id"])[-1]`

			`photo["photoset"] = photoset`
			`yield photo`

			`if params["page"] == photoset["pages"]:`
			`break`
			`params["page"] += 1`
[flickr] add image extractor 2017-05-30 17:43:02 +02:00
			`def _call(self, method, params):`
			`params["method"] = "flickr." + method`
			`params["api_key"] = self.api_key`
			`params["format"] = "json"`
			`params["nojsoncallback"] = "1"`
			`data = self.session.get(self.api_url, params=params).json()`
			`if "code" in data and data["code"] == 1:`
			`raise exception.NotFoundError(self.subcategory)`
			`return data`