Add utf8 rule to tools/check-typo
The utf8 rule allows UTF-8 sequences anywhere in a file and line-length calculations take this into account.master
parent
e1270c41be
commit
7a7c156d3f
|
@ -44,7 +44,7 @@ README* ocaml-typo=missing-header
|
|||
|
||||
/.mailmap ocaml-typo=long-line,missing-header,non-ascii
|
||||
/.merlin ocaml-typo=missing-header
|
||||
/Changes ocaml-typo=non-ascii,missing-header
|
||||
/Changes ocaml-typo=utf8,missing-header
|
||||
/INSTALL ocaml-typo=missing-header
|
||||
/LICENSE ocaml-typo=long-line,very-long-line,missing-header
|
||||
# tools/ci/appveyor/appveyor_build.cmd only has missing-header because
|
||||
|
@ -87,8 +87,8 @@ testsuite/tests/** ocaml-typo=missing-header
|
|||
testsuite/tests/lib-bigarray-2/bigarrf.f ocaml-typo=missing-header,tab
|
||||
testsuite/tests/lib-unix/win-stat/fakeclock.c ocaml-typo=
|
||||
testsuite/tests/misc-unsafe/almabench.ml ocaml-typo=missing-header,long-line
|
||||
testsuite/tests/tool-toplevel/strings.ml ocaml-typo=non-ascii,missing-header
|
||||
testsuite/tests/win-unicode/*.ml ocaml-typo=non-ascii,missing-header
|
||||
testsuite/tests/tool-toplevel/strings.ml ocaml-typo=utf8,missing-header
|
||||
testsuite/tests/win-unicode/*.ml ocaml-typo=utf8,missing-header
|
||||
testsuite/typing ocaml-typo=missing-header
|
||||
|
||||
tools/magic ocaml-typo=missing-header
|
||||
|
|
|
@ -81,6 +81,10 @@
|
|||
# You can ignore a rule by giving the option -<rule> on the command
|
||||
# line (before any file names).
|
||||
|
||||
# Files which include the utf8 rule will have line-length computations take
|
||||
# UTF-8 sequences into account. As a special case, UTF-8 sequences are always
|
||||
# allowed in the copyright headers.
|
||||
|
||||
# First prevent i18n from messing up everything.
|
||||
export LC_ALL=C
|
||||
|
||||
|
@ -191,16 +195,31 @@ IGNORE_DIRS="
|
|||
return c > limit;
|
||||
}
|
||||
|
||||
function utf8_decode(str) {
|
||||
if (is_err("utf8")) {
|
||||
return str;
|
||||
} else {
|
||||
# This script assumes that the UTF-8 has been externally validated
|
||||
t = str;
|
||||
gsub(/[\300-\367][\200-\277]+/, "?", t);
|
||||
if (t != str) {
|
||||
++ counts["utf8"];
|
||||
}
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
BEGIN { state = "(first line)"; }
|
||||
|
||||
match($0, /\t/) {
|
||||
err("tab", "TAB character(s)");
|
||||
if (more_columns($0, 80)){
|
||||
t = utf8_decode($0);
|
||||
if (more_columns(t, 80)){
|
||||
RSTART=81;
|
||||
RLENGTH = 0;
|
||||
err("long-line", "line is over 80 columns");
|
||||
}
|
||||
if (more_columns($0, 132)){
|
||||
if (more_columns(t, 132)){
|
||||
RSTART=133;
|
||||
RLENGTH = 0;
|
||||
err("very-long-line", "line is over 132 columns");
|
||||
|
@ -209,10 +228,14 @@ IGNORE_DIRS="
|
|||
|
||||
match($0, /[\200-\377]/) \
|
||||
&& state != "authors" && state != "copyright" {
|
||||
err("non-ascii", "non-ASCII character(s)");
|
||||
if (header_utf8 && !is_err("non-ascii")) {
|
||||
err("non-ascii-utf8", \
|
||||
"non-ASCII character(s) AND UTF-8 encountered");
|
||||
if (is_err("utf8")) {
|
||||
err("non-ascii", "non-ASCII character(s)");
|
||||
if (header_utf8 && !is_err("non-ascii")) {
|
||||
err("non-ascii-utf8", \
|
||||
"non-ASCII character(s) AND UTF-8 encountered");
|
||||
}
|
||||
} else {
|
||||
++ counts["utf8"];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -229,7 +252,7 @@ IGNORE_DIRS="
|
|||
}
|
||||
|
||||
$0 !~ /\t/ && length($0) > 80 {
|
||||
t = $0;
|
||||
t = utf8_decode($0);
|
||||
sub(/https?:[A-Za-z0-9._~:/?#\[\]@!$&\047()*+,;=%-]{73,}$/, "", t);
|
||||
if (length(t) > 80) {
|
||||
RSTART = 81;
|
||||
|
@ -241,7 +264,10 @@ IGNORE_DIRS="
|
|||
$0 !~ /\t/ && length($0) > 132 {
|
||||
RSTART = 133;
|
||||
RLENGTH = 0;
|
||||
err("very-long-line", "line is over 132 columns");
|
||||
t = utf8_decode($0);
|
||||
if (length(t) > 132) {
|
||||
err("very-long-line", "line is over 132 columns");
|
||||
}
|
||||
}
|
||||
|
||||
# Record that the header contained UTF-8 sequences
|
||||
|
|
Loading…
Reference in New Issue