220 lines
8.0 KiB
Python
220 lines
8.0 KiB
Python
from anime_downloader.sites import helpers
|
|
import logging
|
|
from anime_downloader.sites.anime import Anime, AnimeEpisode, SearchResult
|
|
from anime_downloader.sites import get_anime_class
|
|
from anime_downloader.config import Config
|
|
from anime_downloader.util import primitive_search
|
|
|
|
import warnings
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter('ignore')
|
|
from fuzzywuzzy import fuzz
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AnimeInfo:
|
|
"""
|
|
Attributes
|
|
----------
|
|
url: string
|
|
URL for the info page
|
|
title: string
|
|
English name of the show.
|
|
jp_title: string
|
|
Japanase name of the show.
|
|
metadata: dict
|
|
Data not critical for core functions
|
|
episodes: int
|
|
Max amount of episodes
|
|
"""
|
|
|
|
def __init__(self, url, episodes, title=None, jp_title=None, metadata={}):
|
|
self.url = url
|
|
self.episodes = episodes
|
|
self.title = title
|
|
self.jp_title = jp_title
|
|
self.metadata = metadata
|
|
|
|
|
|
class MatchObject:
|
|
"""
|
|
Attributes
|
|
----------
|
|
AnimeInfo: object
|
|
Metadata object from the MAL search.
|
|
SearchResult: object
|
|
Metadata object from the provider search
|
|
ratio: int
|
|
A number between 0-100 describing the similarities between SearchResult and AnimeInfo.
|
|
Higher number = more similar.
|
|
"""
|
|
|
|
def __init__(self, AnimeInfo, SearchResult, ratio=100):
|
|
self.AnimeInfo = AnimeInfo
|
|
self.SearchResult = SearchResult
|
|
self.ratio = ratio
|
|
|
|
# Not used
|
|
|
|
|
|
def search_mal(query):
|
|
|
|
def search(query):
|
|
soup = helpers.soupify(helpers.get('https://myanimelist.net/anime.php', params={'q': query}))
|
|
search_results = soup.select("a.hoverinfo_trigger.fw-b.fl-l")
|
|
return [SearchResult(
|
|
url=i.get('href'),
|
|
title=i.select('strong')[0].text
|
|
) for i in search_results]
|
|
|
|
def scrape_metadata(url):
|
|
soup = helpers.soupify(helpers.get(url))
|
|
"""
|
|
info_dict contains something like this: [{
|
|
'url': 'https://myanimelist.net/anime/37779/Yakusoku_no_Neverland',
|
|
'title': 'The Promised Neverland',
|
|
'jp_title': '約束のネバーランド'
|
|
},{
|
|
'url': 'https://myanimelist.net/anime/39617/Yakusoku_no_Neverland_2nd_Season',
|
|
'title': 'The Promised Neverland 2nd Season',
|
|
'jp_title': '約束のネバーランド 第2期'}]
|
|
"""
|
|
info_dict = {
|
|
'url': url
|
|
}
|
|
|
|
# Maps specified info in sidebar to variables in info_dict
|
|
name_dict = {
|
|
'Japanese:': 'jp_title',
|
|
'English:': 'title',
|
|
'synonyms:': 'synonyms',
|
|
'Episodes:': 'episodes'
|
|
}
|
|
info = soup.select('span.dark_text')
|
|
extra_info = [i.parent.text.strip() for i in info]
|
|
for i in extra_info:
|
|
text = i.replace('\n', '').strip()
|
|
for j in name_dict:
|
|
if text.startswith(j):
|
|
info_dict[name_dict[j]] = text[len(j):].strip()
|
|
|
|
# Backup name if no English name isn't registered in sidebar
|
|
if not info_dict.get('title'):
|
|
name = soup.select('span[itemprop=name]')
|
|
info_dict['title'] = name[0].text if name else None
|
|
|
|
# Always sets episodes
|
|
if not info_dict.get('episodes') or info_dict.get('episodes') == 'Unknown':
|
|
info_dict['episodes'] = 0
|
|
|
|
# TODO error message when this stuff is not correctly scraped
|
|
# Can happen if MAL is down or something similar
|
|
return AnimeInfo(url=info_dict['url'], title=info_dict.get('title'),
|
|
jp_title=info_dict.get('jp_title'), episodes=int(info_dict['episodes']))
|
|
|
|
search_results = search(query)
|
|
season_info = []
|
|
# Max 10 results
|
|
for i in range(min(len(search_results), 10)):
|
|
anime_info = scrape_metadata(search_results[i].url)
|
|
if anime_info.episodes:
|
|
season_info.append(anime_info)
|
|
|
|
# Code below uses the first result to compare
|
|
#season_info = [scrape_metadata(search_results[0].url)]
|
|
# return season_info
|
|
|
|
# Prompts the user for selection
|
|
return primitive_search(season_info)
|
|
|
|
# Choice allows the user to preselect, used to download from a list overnight.
|
|
# None prompts the user.
|
|
|
|
|
|
def search_anilist(query, choice=None):
|
|
|
|
def search(query):
|
|
ani_query = """
|
|
query ($id: Int, $page: Int, $search: String, $type: MediaType) {
|
|
Page (page: $page, perPage: 10) {
|
|
media (id: $id, search: $search, type: $type) {
|
|
id
|
|
idMal
|
|
description(asHtml: false)
|
|
seasonYear
|
|
title {
|
|
english
|
|
romaji
|
|
native
|
|
}
|
|
coverImage {
|
|
extraLarge
|
|
}
|
|
bannerImage
|
|
averageScore
|
|
status
|
|
episodes
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
url = 'https://graphql.anilist.co'
|
|
|
|
# TODO check in case there's no results
|
|
# It seems to error on no results (anime -ll DEBUG dl "nev")
|
|
results = helpers.post(url, json={'query': ani_query, 'variables': {'search': query, 'page': 1, 'type': 'ANIME'}}).json()['data']['Page']['media']
|
|
if not results:
|
|
logger.error('No results found in anilist')
|
|
raise NameError
|
|
|
|
search_results = [AnimeInfo(url='https://anilist.co/anime/' + str(i['id']), title=i['title']['romaji'],
|
|
jp_title=i['title']['native'], episodes=int(i['episodes']), metadata=i) for i in results if i['episodes'] != None]
|
|
return search_results
|
|
|
|
search_results = search(query)
|
|
|
|
# This can also be fuzzied, but too many options.
|
|
if choice != None:
|
|
# Fixes too low or high to get a real value.
|
|
fixed_choice = ((choice - 1) % len(search_results))
|
|
return search_results[fixed_choice]
|
|
else:
|
|
# Prompts the user for selection
|
|
return primitive_search(search_results)
|
|
|
|
|
|
def fuzzy_match_metadata(seasons_info, search_results):
|
|
# Gets the SearchResult object with the most similarity title-wise to the first MAL/Anilist result
|
|
results = []
|
|
for i in seasons_info:
|
|
for j in search_results:
|
|
# Allows for returning of cleaned title by the provider using 'title_cleaned' in meta_info.
|
|
# To make fuzzy matching better.
|
|
title_provider = j.title.strip() if not j.meta_info.get('title_cleaned') else j.meta_info.get('title_cleaned').strip()
|
|
# On some titles this will be None
|
|
# causing errors below
|
|
title_info = i.title
|
|
|
|
# Essentially adds the chosen key to the query if the version is in use
|
|
# Dirty solution, but should work pretty well
|
|
|
|
config = Config['siteconfig'].get(get_anime_class(j.url).sitename, {})
|
|
version = config.get('version')
|
|
version_use = version == 'dubbed'
|
|
# Adds something like (Sub) or (Dub) to the title
|
|
key_used = j.meta_info.get('version_key_dubbed', '') if version_use else j.meta_info.get('version_key_subbed', '')
|
|
title_info += ' ' + key_used
|
|
title_info = title_info.strip()
|
|
|
|
# TODO add synonyms
|
|
# 0 if there's no japanese name
|
|
jap_ratio = fuzz.ratio(i.jp_title, j.meta_info['jp_title']) if j.meta_info.get('jp_title') else 0
|
|
# Outputs the max ratio for japanese or english name (0-100)
|
|
ratio = max(fuzz.ratio(title_info, title_provider), jap_ratio)
|
|
logger.debug('Ratio: {}, Info title: {}, Provider Title: {}, Key used: {}'.format(ratio, title_info, title_provider, key_used))
|
|
results.append(MatchObject(i, j, ratio))
|
|
|
|
# Returns the result with highest ratio
|
|
return max(results, key=lambda item: item.ratio)
|