Add chardet_confidence_limit option for 'auto' encoding setting.
This commit is contained in:
parent
7f4bc5c36e
commit
d2feac0c66
@ -123,6 +123,10 @@ include_tocpage: true
|
||||
## it has +90% confidence. 'auto' is not reliable.
|
||||
#website_encodings: utf8, Windows-1252, iso-8859-1
|
||||
|
||||
## When using 'auto' in website_encodings, you can tweak the
|
||||
## confidence required to use the chardet detected.
|
||||
#chardet_confidence_limit:0.9
|
||||
|
||||
## entries to make epub subjects and calibre tags
|
||||
## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
|
||||
include_subject_tags: extratags, genre, category, characters, ships, status
|
||||
|
@ -354,6 +354,7 @@ def get_valid_keywords():
|
||||
'include_subject_tags',
|
||||
'include_titlepage',
|
||||
'include_tocpage',
|
||||
'chardet_confidence_limit',
|
||||
'is_adult',
|
||||
'join_string_authorHTML',
|
||||
'keep_style_attr',
|
||||
@ -912,9 +913,11 @@ class Configuration(ConfigParser.SafeConfigParser):
|
||||
continue
|
||||
detected = chardet.detect(data)
|
||||
#print detected
|
||||
if detected['confidence'] > 0.9:
|
||||
if detected['confidence'] > float(self.getConfig("chardet_confidence_limit",0.9)):
|
||||
logger.debug("using chardet detected encoding:%s(%s)"%(detected['encoding'],detected['confidence']))
|
||||
code=detected['encoding']
|
||||
else:
|
||||
logger.debug("chardet confidence too low:%s(%s)"%(detected['encoding'],detected['confidence']))
|
||||
continue
|
||||
return data.decode(code)
|
||||
except:
|
||||
|
@ -123,6 +123,10 @@ include_tocpage: true
|
||||
## it has +90% confidence. 'auto' is not reliable.
|
||||
#website_encodings: utf8, Windows-1252, iso-8859-1
|
||||
|
||||
## When using 'auto' in website_encodings, you can tweak the
|
||||
## confidence required to use the chardet detected.
|
||||
#chardet_confidence_limit:0.9
|
||||
|
||||
## python string Template, string with ${title}, ${author} etc, same as titlepage_entries
|
||||
## Can include directories.
|
||||
#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
|
||||
|
Loading…
x
Reference in New Issue
Block a user