From 6ca879938de45ad1455f047fd6b52500e7610e7d Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Sat, 28 Sep 2019 13:56:56 -0700 Subject: [PATCH] Always feed the entire file into uchardet when detection is needed uchardet will report that a file is "ascii" if the first page has no bytes >127, so we need to actually look at the entire file in case the first higher byte is later in the file. --- libaegisub/common/charset.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libaegisub/common/charset.cpp b/libaegisub/common/charset.cpp index 774e27da1..81008538b 100644 --- a/libaegisub/common/charset.cpp +++ b/libaegisub/common/charset.cpp @@ -59,9 +59,6 @@ std::string Detect(agi::fs::path const& file) { auto read = std::min(4096, fp.size() - offset); auto buf = fp.read(offset, read); uchardet_handle_data(ud, buf, read); - uchardet_data_end(ud); - if (*uchardet_get_charset(ud)) - return uchardet_get_charset(ud); offset += read; @@ -74,6 +71,7 @@ std::string Detect(agi::fs::path const& file) { if (binaryish > offset / 8) return "binary"; } + uchardet_data_end(ud); return uchardet_get_charset(ud); #else auto read = std::min(4096, fp.size());