Merge remote-tracking branch 'yan12125/download-dash-segments' (#5886)

master
Jaime Marquínez Ferrándiz 2015-07-20 19:34:24 +02:00
commit 2ee8f5d80f
3 changed files with 94 additions and 1 deletions

View File

@ -8,6 +8,7 @@ from .hls import NativeHlsFD
from .http import HttpFD from .http import HttpFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from ..utils import ( from ..utils import (
determine_protocol, determine_protocol,
@ -20,6 +21,7 @@ PROTOCOL_MAP = {
'mms': RtspFD, 'mms': RtspFD,
'rtsp': RtspFD, 'rtsp': RtspFD,
'f4m': F4mFD, 'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
} }

View File

@ -0,0 +1,66 @@
from __future__ import unicode_literals
import re
from .common import FileDownloader
from ..compat import compat_urllib_request
class DashSegmentsFD(FileDownloader):
"""
Download segments in a DASH manifest
"""
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
base_url = info_dict['url']
segment_urls = info_dict['segment_urls']
is_test = self.params.get('test', False)
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
byte_counter = 0
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url)
if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
data = self.ydl.urlopen(req).read()
if remaining_bytes is not None:
data = data[:remaining_bytes]
outf.write(data)
return len(data)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s/%s' % (base_url, target_url)
with open(tmpfilename, 'wb') as outf:
append_url_to_file(
outf, combine_url(base_url, info_dict['initialization_url']),
'initialization segment')
for i, segment_url in enumerate(segment_urls):
segment_len = append_url_to_file(
outf, combine_url(base_url, segment_url),
'segment %d / %d' % (i + 1, len(segment_urls)),
remaining_bytes)
byte_counter += segment_len
if remaining_bytes is not None:
remaining_bytes -= segment_len
if remaining_bytes <= 0:
break
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True

View File

@ -535,7 +535,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'dorappi2000', 'uploader': 'dorappi2000',
'formats': 'mincount:33', 'formats': 'mincount:33',
}, },
},
# DASH manifest with segment_list
{
'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
'md5': '8ce563a1d667b599d21064e982ab9e31',
'info_dict': {
'id': 'CsmdDsKjzN8',
'ext': 'mp4',
'upload_date': '20150510',
'uploader': 'Airtek',
'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
},
'params': {
'youtube_include_dash_manifest': True,
'format': '135', # bestvideo
} }
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -826,6 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# TODO implement WebVTT downloading # TODO implement WebVTT downloading
pass pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'): elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
format_id = r.attrib['id'] format_id = r.attrib['id']
video_url = url_el.text video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@ -839,6 +858,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize': filesize, 'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')), 'fps': int_or_none(r.attrib.get('frameRate')),
} }
if segment_list is not None:
f.update({
'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
'protocol': 'http_dash_segments',
})
try: try:
existing_format = next( existing_format = next(
fo for fo in formats fo for fo in formats