Basic Code

Code with 1 url of episode at time.
2021-01-12 22:47:42 +05:30 · 2021-01-12 22:47:42 +05:30 · 31d331a50a
commit 31d331a50a
10 changed files with 309 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+# Default ignored files
+/workspace.xml
+src/.idea/*
+src/dist/*
+*.pyc
--- a/src/init.py
+++ b/src/init.py
@ -0,0 +1,2 @@
+# #!/usr/bin/env python
+# # -*- coding: utf-8 -*-
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+sys.path.append("..")
+from movies import *
+# from hulusubs_dl.__version__ import __version__
+# from hulusubs_dl.cust_utils import *
+# from hulusubs_dl.hulu_subs_dl import HuluSubsDl
+
+if __name__ == "__main__":
+    Movies(sys.argv[1:], os.getcwd())
+    sys.exit()
--- a/src/version.py
+++ b/src/version.py
@ -0,0 +1,4 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+__version__ = "2021.01.12"
--- a/src/cust_utils/init.py
+++ b/src/cust_utils/init.py
@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from . import path_util
+from . import utils
+from . import browser_instance
--- a/src/cust_utils/browser_instance.py
+++ b/src/cust_utils/browser_instance.py
@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import requests
+import json
+from random import random
+import logging
+
+
+def get_user_agent():
+    user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
+    user_agent = user_agent + ' Chrome/56.0.2924.87 Safari/537.36'
+    return user_agent
+
+
+def get_request(url, text_only=False, **kwargs):
+    _proxy = kwargs.get("proxy")
+    _rand_proxy = None
+    headers = {
+        'User-Agent': get_user_agent(),
+        'Accept-Encoding': 'gzip, deflate',
+    }
+    if kwargs.get('xml_http_request', False):
+        headers['X-Requested-With'] = 'XMLHttpRequest'
+
+    if _proxy and len(_proxy) > 0:
+        try:
+            _rand_proxy = random.choice(_proxy)
+        except IndexError as error:
+            print("Proxy Failed : {0}".format(error))
+            print("Continuing Without Proxy.")
+            _rand_proxy = None
+
+    proxy = {
+        "http": _rand_proxy,
+        "https": _rand_proxy
+    }
+
+    logging.debug('GET url: {0}'.format(url))
+    logging.debug('GET proxy: {0}'.format(proxy))
+
+    sess = requests.session()
+    connection = sess.get(url, headers=headers, proxies=proxy)
+
+    if connection.status_code != 200:
+        print("Whoops! Seems like I can't connect to website.")
+        print("It's showing : %s" % connection)
+        print("Run this script with the --verbose argument and report the issue along with log file on Github.")
+        print("Can't connect to website %s" % url)
+        return None
+    else:
+        if text_only:
+            return connection.content
+        return json.loads(connection.text.encode("utf-8"))
+
+
+def post_request(url, data, cookie_value, **kwargs):
+    _proxy = kwargs.get("proxy")
+    _rand_proxy = None
+    if not cookie_value:
+        raise Warning("No Cookie Value Provided. Exiting")
+    headers = {
+        'User-Agent': get_user_agent(),
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept': '*/*',
+        'Content-Type': 'application/json',
+        'Cookie': cookie_value
+    }
+    if _proxy and len(_proxy) > 0:
+        try:
+            _rand_proxy = random.choice(_proxy)
+        except IndexError as error:
+            print("Proxy Failed : {0}".format(error))
+            print("Continuing Without Proxy.")
+            _rand_proxy = None
+
+    proxy = {
+        "http": _rand_proxy,
+        "https": _rand_proxy
+    }
+    logging.debug('POST url: {0}'.format(url))
+    logging.debug('POST proxy: {0}'.format(proxy))
+    sess = requests.session()
+    connection = sess.post(url, data=data, headers=headers, proxies=proxy)
+
+    if connection.status_code != 200:
+        print("Whoops! Seems like I can't connect to website.")
+        print("It's showing : %s" % connection)
+        print("Run this script with the --verbose argument and report the issue along with log file on Github.")
+        print("Can't connect to website %s" % url)
+        return None
+    else:
+        return json.loads(connection.text.encode("utf-8"))
--- a/src/cust_utils/path_util.py
+++ b/src/cust_utils/path_util.py
@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+from pathlib import Path
+
+
+def get_abs_path_name(file_path, file_name):
+    return os.path.abspath(file_path + file_name)
+
+
+def file_exists(file_path, file_name):
+    return os.path.isfile(get_abs_path_name(file_path, file_name))
+
+
+def create_paths(directory):
+    Path(os.path.abspath(directory)).mkdir(parents=True, exist_ok=True)
+    return os.path.abspath(directory)
--- a/src/cust_utils/utils.py
+++ b/src/cust_utils/utils.py
@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from . import path_util
+import subprocess
+
+
+def create_file(file_path, file_name, data_to_write):
+    if not isinstance(data_to_write, str):
+        data_to_write = str(data_to_write)
+    if not data_to_write or not str(data_to_write).strip():
+        print("Empty data provided for {0}".format(file_name))
+        return False
+    file_location = path_util.get_abs_path_name(file_path, file_name)
+    with open(file_location, 'w') as f:
+        f.write(data_to_write)
+        f.flush()
+    return True
+
+
+def create_file_binary_mode(file_path, file_name, data_to_write):
+    if not data_to_write or not str(data_to_write).strip():
+        print("Empty data provided for {0}".format(file_name))
+        return False
+    file_location = path_util.get_abs_path_name(file_path, file_name)
+    with open(file_location, 'wb') as f:
+        f.write(data_to_write)
+        f.flush()
+    return True
+
+
+def read_file_data(file_path, file_name):
+    file_location = path_util.get_abs_path_name(file_path, file_name)
+    content = None
+    with open(file_location, 'r') as f:
+        content = f.read().strip()
+    return None if content == "" else content
+
+
+def get_clean_path_name(path_name):
+    for cha in '\/*?:"<>|,;\'':
+        path_name = path_name.replace(cha, ' -')
+    return path_name
+
+
+def get_youtube_dl_command(file_location, video_url):
+    command = 'youtube-dl -i "{0}" -o "{1}"'.format(video_url, file_location)
+    return command
+
+
+def call_youtube_dl(youtube_dl_command):
+    process = subprocess.Popen(youtube_dl_command, shell=True, stdout=subprocess.PIPE)
+    process.wait()
+    return process.returncode
--- a/src/movies.py
+++ b/src/movies.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import re
+import argparse
+import logging
+import platform
+from cust_utils import *
+from __version__ import __version__
+from movies_api import *
+from bs4 import BeautifulSoup
+
+
+class Movies:
+    def __init__(self, argv, cwd):
+        print("Main")
+        related_episodes = []
+        url = None
+        file_name = None
+        video_file_name = None
+        base_page_content = None
+        youtube_dl_command = None
+        url = 'https://new-movies123.co/tv-series/deutschland-89-season-1/UyTSbjuh/o8r2h6r6/s4dasrz5-watch-online-for-free.html'
+        resolution = '1080'
+        while not url:
+            url = input("Enter Movie URL : ").strip()
+
+        xml_request_data = get_xml_http_request(url)
+        if xml_request_data and xml_request_data == "none":
+            print("IP Limit Reached")
+            sys.exit(1)
+        if xml_request_data:
+            xml_request_data = dict(xml_request_data[0])
+            video_url = xml_request_data.get('src', None)
+            max_resolution = xml_request_data.get('max', None)
+            if not video_url:
+                video_url = xml_request_data.get('file', None)
+                if not video_url:
+                    print("Couldn't get Video Stream URL.")
+                    sys.exit(1)
+            if video_url:
+                print("Got Video Stream.")
+                if max_resolution:
+                    print("Will Be Downloading {0} Stream.".format(max_resolution))
+                    video_url = str(video_url).replace('/360?', '/{0}?'.format(max_resolution))
+                else:
+                    print("Couldn't Find Max Resolution. Going with default {0}.".format(xml_request_data.get('label', '')))
+
+                base_page_content = get_http_request(url, text_only=True)
+                if not base_page_content:
+                    print("Can't Parse Basic Info")
+                    # ASK USER FOR FILE NAME
+                    while not file_name:
+                        file_name = input("Enter file name : ").strip()
+                else:
+                    soup = BeautifulSoup(base_page_content, 'html.parser')
+
+                    video_metadata = soup.find_all('script', type='application/ld+json')
+                    if not video_metadata:
+                        print("Can't find metadata")
+                    if len(video_metadata) > 1:
+                        metadata_json = str(video_metadata[0]).replace('<script type="application/ld+json">', '').replace('</script>', '')
+                        season_metadata = dict(json.loads(str(metadata_json)))
+                        current__episode_metadata_json = str(video_metadata[1]).replace('<script type="application/ld+json">', '').replace('</script>', '')
+                        current_video_metadata = dict(json.loads(current__episode_metadata_json))
+                        episodes = season_metadata.get('episode', [])
+                        for episode in episodes:
+                            url = dict(episode).get('url', None)
+                            if url:
+                                related_episodes.append(str(url))
+                        current_episode_list = current_video_metadata.get('itemListElement')
+                        if current_episode_list and len(current_episode_list) > 0:
+                            episode_dict = dict(current_episode_list[-1])
+                            episode_item = episode_dict.get('item')
+                            # current_episode = json.loads(str(episode_item))
+                            # current_episode_name = utils.get_clean_path_name(current_episode['name'])
+                            current_episode_name = utils.get_clean_path_name(dict(episode_item).get('name'))
+                            file_name = '{0}.srt'.format(current_episode_name)
+                            video_file_name = '{0}.mp4'.format(current_episode_name)
+                            subs_json = re.search(r'window.subtitles = (.*?)</script>', str(base_page_content))
+                            if subs_json:
+                                subtitle_info_list = eval(subs_json.group(1))
+                                subtitle_info = dict(subtitle_info_list[0]).get('src')
+                                if subtitle_info:
+                                    subtitle_src = str(subtitle_info).replace('\\', '')
+                                    subtitle_content = browser_instance.get_request(subtitle_src, text_only=True)
+                                    series_name = url.split('/')[4]
+                                    path_created = path_util.create_paths('dist' + os.sep + series_name)
+                                    if path_created:
+                                        file_written = utils.create_file_binary_mode(path_created, os.sep + file_name, subtitle_content)
+                                        if file_written:
+                                            print("Downloaded : {0}".format(file_name))
+                                        yt_command = utils.get_youtube_dl_command(file_location=path_created + os.sep + video_file_name, video_url=video_url)
+                                        print("Youtube-dl Command: {0}".format(yt_command))
+                                        process_code = utils.call_youtube_dl(yt_command)
+                                        print("Process Done: {0}".format(process_code))
+
--- a/src/movies_api.py
+++ b/src/movies_api.py
@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import cust_utils
+import json
+
+BASE_URL = 'https://discover.hulu.com/content/v3/entity'
+
+
+def get_xml_http_request(movie_url):
+    response = cust_utils.browser_instance.get_request(url=movie_url, xml_http_request=True)
+    return response
+
+
+def get_http_request(movie_url, text_only=False):
+    response = cust_utils.browser_instance.get_request(url=movie_url, text_only=text_only)
+    return response