implement youtube-dl downloader module
URLs starting with 'ytdl:' will now be handled by youtube-dl. There is probably a lot to fix and improve, but the basic use case works. TODO: - format selection and ytdl options in general - better filename/path handling - ytdl support for "unsupported URLs" - ...
This commit is contained in:
parent
f4df6c2396
commit
188876d814
59
gallery_dl/downloader/ytdl.py
Normal file
59
gallery_dl/downloader/ytdl.py
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Downloader module for URLs requiring youtube-dl support"""
|
||||
|
||||
from .common import DownloaderBase
|
||||
from youtube_dl import YoutubeDL
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
class Downloader(DownloaderBase):
|
||||
scheme = "ytdl"
|
||||
|
||||
def __init__(self, session, output):
|
||||
DownloaderBase.__init__(self, session, output)
|
||||
self.ytdl = YoutubeDL({
|
||||
"logger": logging.getLogger("ytdl"),
|
||||
})
|
||||
|
||||
def download(self, url, pathfmt):
|
||||
try:
|
||||
info_dict = self.ytdl.extract_info(url[5:], download=False)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
if "entries" in info_dict:
|
||||
return self._download_playlist(pathfmt, info_dict)
|
||||
return self._download_video(pathfmt, info_dict)
|
||||
|
||||
def _download_video(self, pathfmt, info_dict):
|
||||
pathfmt.set_extension(info_dict["ext"])
|
||||
if pathfmt.exists():
|
||||
pathfmt.temppath = ""
|
||||
return True
|
||||
if self.partdir:
|
||||
pathfmt.temppath = os.path.join(
|
||||
self.partdir, pathfmt.filename)
|
||||
self.ytdl.params["outtmpl"] = pathfmt.temppath.replace("%", "%$")
|
||||
|
||||
self.out.start(pathfmt.path)
|
||||
try:
|
||||
self.ytdl.process_info(info_dict)
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _download_playlist(self, pathfmt, info_dict):
|
||||
pathfmt.set_extension("%(playlist_index)s.%(ext)s")
|
||||
self.ytdl.params["outtmpl"] = pathfmt.realpath
|
||||
|
||||
for entry in info_dict["entries"]:
|
||||
self.ytdl.process_info(entry)
|
||||
return True
|
@ -9,7 +9,7 @@
|
||||
"""Extract images from https://twitter.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, extractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class TwitterExtractor(Extractor):
|
||||
@ -26,9 +26,6 @@ class TwitterExtractor(Extractor):
|
||||
self.retweets = self.config("retweets", True)
|
||||
self.videos = self.config("videos", False)
|
||||
|
||||
if self.videos:
|
||||
self._blacklist = extractor.blacklist(("twitter",))
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, self.metadata()
|
||||
@ -45,10 +42,10 @@ class TwitterExtractor(Extractor):
|
||||
yield Message.Url, url + ":orig", data
|
||||
|
||||
if self.videos and "-videoContainer" in tweet:
|
||||
url = "{}/{}/status/{}".format(
|
||||
data["num"] = 1
|
||||
url = "ytdl:{}/{}/status/{}".format(
|
||||
self.root, data["user"], data["tweet_id"])
|
||||
with self._blacklist:
|
||||
yield Message.Queue, url, data
|
||||
yield Message.Url, url, data
|
||||
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
|
@ -179,17 +179,17 @@ class DownloadJob(Job):
|
||||
time.sleep(self.sleep)
|
||||
|
||||
# download from URL
|
||||
if not self.get_downloader(url).download(url, self.pathfmt):
|
||||
if not self.download(url):
|
||||
|
||||
# use fallback URLs if available
|
||||
for num, url in enumerate(fallback or (), 1):
|
||||
self.log.info("Trying fallback URL #%d", num)
|
||||
if self.get_downloader(url).download(url, self.pathfmt):
|
||||
if self.download(url):
|
||||
break
|
||||
else:
|
||||
# download failed
|
||||
self.log.error(
|
||||
"Failed to download %s", self.pathfmt.filename)
|
||||
"Failed to download %s", self.pathfmt.filename or url)
|
||||
return
|
||||
|
||||
if not self.pathfmt.temppath:
|
||||
@ -230,17 +230,29 @@ class DownloadJob(Job):
|
||||
for pp in self.postprocessors:
|
||||
pp.finalize()
|
||||
|
||||
def get_downloader(self, url):
|
||||
"""Return, and possibly construct, a downloader suitable for 'url'"""
|
||||
def download(self, url):
|
||||
"""Download 'url'"""
|
||||
scheme = url.partition(":")[0]
|
||||
downloader = self.get_downloader(scheme)
|
||||
if downloader:
|
||||
return downloader.download(url, self.pathfmt)
|
||||
return False
|
||||
|
||||
def get_downloader(self, scheme):
|
||||
"""Return a downloader suitable for 'scheme'"""
|
||||
if scheme == "https":
|
||||
scheme = "http"
|
||||
try:
|
||||
return self.downloaders[scheme]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
klass = downloader.find(scheme)
|
||||
instance = klass(self.extractor.session, self.out)
|
||||
if klass:
|
||||
instance = klass(self.extractor.session, self.out)
|
||||
else:
|
||||
instance = None
|
||||
self.log.error("'%s:' URLs are not supported", scheme)
|
||||
self.downloaders[scheme] = instance
|
||||
return instance
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user