Validate the encoding of files marked UTF-8

tools/check-typo validates the encoding of any file with ocaml-typo=utf8
and will not proceed if the test fails.
master
David Allsopp 2018-06-07 13:35:47 +01:00
parent 7a7c156d3f
commit 6885aeeedb
1 changed files with 16 additions and 3 deletions

View File

@ -81,9 +81,9 @@
# You can ignore a rule by giving the option -<rule> on the command
# line (before any file names).
# Files which include the utf8 rule will have line-length computations take
# UTF-8 sequences into account. As a special case, UTF-8 sequences are always
# allowed in the copyright headers.
# Files which include the utf8 rule will be validated using grep and line-length
# computations will take UTF-8 sequences into account. As a special case, UTF-8
# sequences are always allowed in the copyright headers.
# First prevent i18n from messing up everything.
export LC_ALL=C
@ -163,6 +163,19 @@ IGNORE_DIRS="
ocamldoc/*|*/ocamldoc/*) rules="long-line,$rules";;
esac
case ,$svnrules, in
*,utf8,*)
# grep -a is used to force the file to be considered as text and -x
# requires the entire line to match. This specifically detects the
# presence of lines containing malformed UTF-8. It may be tested using
# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
if LC_ALL=en_US.UTF8 grep -qaxv '.*' "$f" ; then
echo File "$f" is not correctly encoded in UTF-8
exit 2
fi
;;
esac
(cat "$f" | tr -d '\r'; echo) \
| awk -v rules="$rules" -v svnrules="$svnrules" -v file="$f" \
'