Validate the encoding of files marked UTF-8
tools/check-typo validates the encoding of any file with ocaml-typo=utf8 and will not proceed if the test fails.master
parent
7a7c156d3f
commit
6885aeeedb
|
@ -81,9 +81,9 @@
|
|||
# You can ignore a rule by giving the option -<rule> on the command
|
||||
# line (before any file names).
|
||||
|
||||
# Files which include the utf8 rule will have line-length computations take
|
||||
# UTF-8 sequences into account. As a special case, UTF-8 sequences are always
|
||||
# allowed in the copyright headers.
|
||||
# Files which include the utf8 rule will be validated using grep and line-length
|
||||
# computations will take UTF-8 sequences into account. As a special case, UTF-8
|
||||
# sequences are always allowed in the copyright headers.
|
||||
|
||||
# First prevent i18n from messing up everything.
|
||||
export LC_ALL=C
|
||||
|
@ -163,6 +163,19 @@ IGNORE_DIRS="
|
|||
ocamldoc/*|*/ocamldoc/*) rules="long-line,$rules";;
|
||||
esac
|
||||
|
||||
case ,$svnrules, in
|
||||
*,utf8,*)
|
||||
# grep -a is used to force the file to be considered as text and -x
|
||||
# requires the entire line to match. This specifically detects the
|
||||
# presence of lines containing malformed UTF-8. It may be tested using
|
||||
# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
||||
if LC_ALL=en_US.UTF8 grep -qaxv '.*' "$f" ; then
|
||||
echo File "$f" is not correctly encoded in UTF-8
|
||||
exit 2
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
(cat "$f" | tr -d '\r'; echo) \
|
||||
| awk -v rules="$rules" -v svnrules="$svnrules" -v file="$f" \
|
||||
'
|
||||
|
|
Loading…
Reference in New Issue