diff --git a/Changes b/Changes index c9f718276..7b82b1c92 100644 --- a/Changes +++ b/Changes @@ -140,6 +140,9 @@ Working version (Whitequark and Jacques-Henri Jourdan, review by Gabriel Scherer and Xavier Clerc) +- #8621: Make ocamlyacc a Windows Unicode application + (David Allsopp, review by Nicolás Ojeda Bär) + * #8834, `ocaml`: adhere to the XDG base directory specification to locate an `.ocamlinit` file. Reads an `$XDG_CONFIG_HOME/ocaml/init.ml` file before trying to lookup `~/.ocamlinit`. On Windows the behaviour diff --git a/runtime/caml/misc.h b/runtime/caml/misc.h index 06844b05c..3df39aa5d 100644 --- a/runtime/caml/misc.h +++ b/runtime/caml/misc.h @@ -257,6 +257,9 @@ extern double caml_log1p(double); #define strcmp_os wcscmp #define strlen_os wcslen #define sscanf_os swscanf +#define strcpy_os wcscpy +#define mktemp_os _wmktemp +#define fopen_os _wfopen #define caml_stat_strdup_os caml_stat_wcsdup #define caml_stat_strconcat_os caml_stat_wcsconcat @@ -289,6 +292,9 @@ extern double caml_log1p(double); #define strcmp_os strcmp #define strlen_os strlen #define sscanf_os sscanf +#define strcpy_os strcpy +#define mktemp_os mktemp +#define fopen_os fopen #define caml_stat_strdup_os caml_stat_strdup #define caml_stat_strconcat_os caml_stat_strconcat diff --git a/yacc/Makefile b/yacc/Makefile index 7d6c0e194..bbd8dcc44 100644 --- a/yacc/Makefile +++ b/yacc/Makefile @@ -22,9 +22,15 @@ include $(ROOTDIR)/Makefile.common OC_CPPFLAGS += -I$(ROOTDIR)/runtime +ifeq "$(UNIX_OR_WIN32)" "win32" +WSTR_OBJ = wstr +else +WSTR_OBJ = +endif + ocamlyacc_SOURCES := $(addsuffix .c,\ - closure error lalr lr0 main mkpar output reader skeleton symtab verbose \ - warshall) + $(WSTR_OBJ) closure error lalr lr0 main mkpar output reader skeleton \ + symtab verbose warshall) ocamlyacc_OBJECTS := $(ocamlyacc_SOURCES:.c=.$(O)) @@ -32,12 +38,8 @@ generated_files := ocamlyacc$(EXE) $(ocamlyacc_OBJECTS) version.h all: ocamlyacc$(EXE) -ifeq ($(TOOLCHAIN),cc) -MKEXE_ANSI=$(MKEXE) -endif - ocamlyacc$(EXE): $(ocamlyacc_OBJECTS) - $(MKEXE_ANSI) -o $@ $^ $(EXTRALIBS) + $(MKEXE) -o $@ $^ $(EXTRALIBS) version.h : $(ROOTDIR)/VERSION echo "#define OCAML_VERSION \"`sed -e 1q $< | tr -d '\r'`\"" > $@ diff --git a/yacc/defs.h b/yacc/defs.h index 9bd973c48..91aadc3e6 100644 --- a/yacc/defs.h +++ b/yacc/defs.h @@ -25,7 +25,13 @@ #include #include #include -#include "caml/misc.h" +#include +#define CAML_INTERNALS +#include "caml/config.h" +#include "caml/mlvalues.h" +#include "caml/osdeps.h" + +#define caml_stat_strdup strdup /* machine-dependent definitions */ /* the following definitions are for the Tahoe */ @@ -69,9 +75,9 @@ /* defines for constructing filenames */ -#define OUTPUT_SUFFIX ".ml" -#define VERBOSE_SUFFIX ".output" -#define INTERFACE_SUFFIX ".mli" +#define OUTPUT_SUFFIX T(".ml") +#define VERBOSE_SUFFIX T(".output") +#define INTERFACE_SUFFIX T(".mli") /* keyword codes */ @@ -212,21 +218,27 @@ extern char sflag; extern char eflag; extern char big_endian; +/* myname should be UTF-8 encoded */ extern char *myname; extern char *cptr; extern char *line; extern int lineno; +/* virtual_input_file_name should be UTF-8 encoded */ extern char *virtual_input_file_name; extern int outline; -extern char *action_file_name; -extern char *entry_file_name; -extern char *code_file_name; -extern char *input_file_name; -extern char *output_file_name; -extern char *text_file_name; -extern char *verbose_file_name; -extern char *interface_file_name; +extern char_os *action_file_name; +extern char_os *entry_file_name; +extern char_os *code_file_name; +extern char_os *input_file_name; +extern char_os *output_file_name; +extern char_os *text_file_name; +extern char_os *verbose_file_name; +extern char_os *interface_file_name; + +/* UTF-8 versions of code_file_name and input_file_name */ +extern char *code_file_name_disp; +extern char *input_file_name_disp; extern FILE *action_file; extern FILE *entry_file; @@ -318,7 +330,7 @@ extern void lr0 (void); extern void make_parser (void); extern void no_grammar (void) Noreturn; extern void no_space (void) Noreturn; -extern void open_error (char *filename) Noreturn; +extern void open_error (char_os *filename) Noreturn; extern void output (void); extern void prec_redeclared (void); extern void polymorphic_entry_point(char *s) Noreturn; diff --git a/yacc/error.c b/yacc/error.c index f116f2c87..b2750c971 100644 --- a/yacc/error.c +++ b/yacc/error.c @@ -19,6 +19,9 @@ #include "defs.h" +/* String displayed if we can't malloc a buffer for the UTF-8 conversion */ +static char *unknown = ""; + void fatal(char *msg) { fprintf(stderr, "%s: f - %s\n", myname, msg); @@ -33,9 +36,10 @@ void no_space(void) } -void open_error(char *filename) +void open_error(char_os *filename) { - fprintf(stderr, "%s: f - cannot open \"%s\"\n", myname, filename); + char *u8 = caml_stat_strdup_of_os(filename); + fprintf(stderr, "%s: f - cannot open \"%s\"\n", myname, (u8 ? u8 : unknown)); done(2); } diff --git a/yacc/main.c b/yacc/main.c index 4a04a21f2..a60f46762 100644 --- a/yacc/main.c +++ b/yacc/main.c @@ -33,12 +33,14 @@ char eflag; char sflag; char big_endian; -char *file_prefix = 0; +char_os *file_prefix = 0; char *myname = "yacc"; -char temp_form[] = "yacc.XXXXXXX"; +char_os temp_form[] = T("yacc.XXXXXXX"); #ifdef _WIN32 -char dirsep = '\\'; +wchar_t dirsep = L'\\'; +/* mingw provides an implementation of mkstemp, but it's ANSI only */ +#undef HAS_MKSTEMP #else char dirsep = '/'; #endif @@ -47,14 +49,16 @@ int lineno; char *virtual_input_file_name = NULL; int outline; -char *action_file_name; -char *entry_file_name; -char *code_file_name; -char *interface_file_name; -char *input_file_name = ""; -char *output_file_name; -char *text_file_name; -char *verbose_file_name; +char_os *action_file_name; +char_os *entry_file_name; +char_os *code_file_name; +char *code_file_name_disp; +char_os *interface_file_name; +char_os *input_file_name = T(""); +char *input_file_name_disp; +char_os *output_file_name; +char_os *text_file_name; +char_os *verbose_file_name; #ifdef HAS_MKSTEMP int action_fd = -1, entry_fd = -1, text_fd = -1; @@ -105,15 +109,15 @@ void done(int k) if (text_fd != -1) unlink(text_file_name); #else - if (action_file) { fclose(action_file); unlink(action_file_name); } - if (entry_file) { fclose(entry_file); unlink(entry_file_name); } - if (text_file) { fclose(text_file); unlink(text_file_name); } + if (action_file) { fclose(action_file); unlink_os(action_file_name); } + if (entry_file) { fclose(entry_file); unlink_os(entry_file_name); } + if (text_file) { fclose(text_file); unlink_os(text_file_name); } #endif if (output_file && k > 0) { - fclose(output_file); unlink(output_file_name); + fclose(output_file); unlink_os(output_file_name); } if (interface_file && k > 0) { - fclose(interface_file); unlink(interface_file_name); + fclose(interface_file); unlink_os(interface_file_name); } exit(k); } @@ -149,12 +153,13 @@ void usage(void) exit(1); } -void getargs(int argc, char **argv) +void getargs(int argc, char_os **argv) { register int i; - register char *s; + register char_os *s; - if (argc > 0) myname = argv[0]; + if (argc > 0) myname = caml_stat_strdup_of_os(argv[0]); + if (!myname) no_space(); for (i = 1; i < argc; ++i) { s = argv[i]; @@ -163,12 +168,12 @@ void getargs(int argc, char **argv) { case '\0': input_file = stdin; - file_prefix = "stdin"; + file_prefix = T("stdin"); if (i + 1 < argc) usage(); return; case '-': - if (!strcmp (argv[i], "--strict")){ + if (!strcmp_os (argv[i], T("--strict"))){ eflag = 1; goto end_of_option; } @@ -176,11 +181,11 @@ void getargs(int argc, char **argv) goto no_more_options; case 'v': - if (!strcmp (argv[i], "-version")){ + if (!strcmp_os (argv[i], T("-version"))){ printf ("The OCaml parser generator, version " OCAML_VERSION "\n"); exit (0); - }else if (!strcmp (argv[i], "-vnum")){ + }else if (!strcmp_os (argv[i], T("-vnum"))){ printf (OCAML_VERSION "\n"); exit (0); }else{ @@ -230,12 +235,14 @@ end_of_option:; no_more_options:; if (i + 1 != argc) usage(); input_file_name = argv[i]; + input_file_name_disp = caml_stat_strdup_of_os(input_file_name); + if (!input_file_name_disp) no_space(); if (file_prefix == 0) { int len; - len = strlen(argv[i]); - file_prefix = malloc(len + 1); + len = strlen_os(argv[i]); + file_prefix = MALLOC((len + 1) * sizeof(char_os)); if (file_prefix == 0) no_space(); - strcpy(file_prefix, argv[i]); + strcpy_os(file_prefix, argv[i]); while (len > 0) { len--; if (file_prefix[len] == '.') { @@ -265,30 +272,30 @@ allocate(unsigned int n) void create_file_names(void) { int i, len; - char *tmpdir; + char_os *tmpdir; #ifdef _WIN32 - tmpdir = getenv("TEMP"); - if (tmpdir == 0) tmpdir = "."; + tmpdir = _wgetenv(L"TEMP"); + if (tmpdir == 0) tmpdir = L"."; #else tmpdir = getenv("TMPDIR"); if (tmpdir == 0) tmpdir = "/tmp"; #endif - len = strlen(tmpdir); + len = strlen_os(tmpdir); i = len + sizeof(temp_form); if (len && tmpdir[len-1] != dirsep) ++i; - action_file_name = MALLOC(i); + action_file_name = MALLOC(i * sizeof(char_os)); if (action_file_name == 0) no_space(); - entry_file_name = MALLOC(i); + entry_file_name = MALLOC(i * sizeof(char_os)); if (entry_file_name == 0) no_space(); - text_file_name = MALLOC(i); + text_file_name = MALLOC(i * sizeof(char_os)); if (text_file_name == 0) no_space(); - strcpy(action_file_name, tmpdir); - strcpy(entry_file_name, tmpdir); - strcpy(text_file_name, tmpdir); + strcpy_os(action_file_name, tmpdir); + strcpy_os(entry_file_name, tmpdir); + strcpy_os(text_file_name, tmpdir); if (len && tmpdir[len - 1] != dirsep) { @@ -298,13 +305,13 @@ void create_file_names(void) ++len; } - strcpy(action_file_name + len, temp_form); - strcpy(entry_file_name + len, temp_form); - strcpy(text_file_name + len, temp_form); + strcpy_os(action_file_name + len, temp_form); + strcpy_os(entry_file_name + len, temp_form); + strcpy_os(text_file_name + len, temp_form); - action_file_name[len + 5] = 'a'; - entry_file_name[len + 5] = 'e'; - text_file_name[len + 5] = 't'; + action_file_name[len + 5] = L'a'; + entry_file_name[len + 5] = L'e'; + text_file_name[len + 5] = L't'; #ifdef HAS_MKSTEMP action_fd = mkstemp(action_file_name); @@ -317,35 +324,37 @@ void create_file_names(void) if (text_fd == -1) open_error(text_file_name); #else - mktemp(action_file_name); - mktemp(entry_file_name); - mktemp(text_file_name); + mktemp_os(action_file_name); + mktemp_os(entry_file_name); + mktemp_os(text_file_name); #endif - len = strlen(file_prefix); + len = strlen_os(file_prefix); - output_file_name = MALLOC(len + 7); + output_file_name = MALLOC((len + 7) * sizeof(char_os)); if (output_file_name == 0) no_space(); - strcpy(output_file_name, file_prefix); - strcpy(output_file_name + len, OUTPUT_SUFFIX); + strcpy_os(output_file_name, file_prefix); + strcpy_os(output_file_name + len, OUTPUT_SUFFIX); code_file_name = output_file_name; + code_file_name_disp = caml_stat_strdup_of_os(code_file_name); + if (!code_file_name_disp) no_space(); if (vflag) { - verbose_file_name = MALLOC(len + 8); + verbose_file_name = MALLOC((len + 8) * sizeof(char_os)); if (verbose_file_name == 0) no_space(); - strcpy(verbose_file_name, file_prefix); - strcpy(verbose_file_name + len, VERBOSE_SUFFIX); + strcpy_os(verbose_file_name, file_prefix); + strcpy_os(verbose_file_name + len, VERBOSE_SUFFIX); } - interface_file_name = MALLOC(len + 8); + interface_file_name = MALLOC((len + 8) * sizeof(char_os)); if (interface_file_name == 0) no_space(); - strcpy(interface_file_name, file_prefix); - strcpy(interface_file_name + len, INTERFACE_SUFFIX); + strcpy_os(interface_file_name, file_prefix); + strcpy_os(interface_file_name + len, INTERFACE_SUFFIX); } @@ -356,7 +365,7 @@ void open_files(void) if (input_file == 0) { - input_file = fopen(input_file_name, "r"); + input_file = fopen_os(input_file_name, T("r")); if (input_file == 0) open_error(input_file_name); } @@ -364,7 +373,7 @@ void open_files(void) #ifdef HAS_MKSTEMP action_file = fdopen(action_fd, "w"); #else - action_file = fopen(action_file_name, "w"); + action_file = fopen_os(action_file_name, T("w")); #endif if (action_file == 0) open_error(action_file_name); @@ -372,7 +381,7 @@ void open_files(void) #ifdef HAS_MKSTEMP entry_file = fdopen(entry_fd, "w"); #else - entry_file = fopen(entry_file_name, "w"); + entry_file = fopen_os(entry_file_name, T("w")); #endif if (entry_file == 0) open_error(entry_file_name); @@ -380,25 +389,25 @@ void open_files(void) #ifdef HAS_MKSTEMP text_file = fdopen(text_fd, "w"); #else - text_file = fopen(text_file_name, "w"); + text_file = fopen_os(text_file_name, T("w")); #endif if (text_file == 0) open_error(text_file_name); if (vflag) { - verbose_file = fopen(verbose_file_name, "w"); + verbose_file = fopen_os(verbose_file_name, T("w")); if (verbose_file == 0) open_error(verbose_file_name); } - output_file = fopen(output_file_name, "w"); + output_file = fopen_os(output_file_name, T("w")); if (output_file == 0) open_error(output_file_name); if (rflag) { - code_file = fopen(code_file_name, "w"); + code_file = fopen_os(code_file_name, T("w")); if (code_file == 0) open_error(code_file_name); } @@ -406,12 +415,16 @@ void open_files(void) code_file = output_file; - interface_file = fopen(interface_file_name, "w"); + interface_file = fopen_os(interface_file_name, T("w")); if (interface_file == 0) open_error(interface_file_name); } +#ifdef _WIN32 +int wmain(int argc, wchar_t **argv) +#else int main(int argc, char **argv) +#endif { set_signals(); getargs(argc, argv); diff --git a/yacc/output.c b/yacc/output.c index 4e871dec1..384890ae4 100644 --- a/yacc/output.c +++ b/yacc/output.c @@ -785,7 +785,7 @@ void output_stored_text(void) register FILE *in, *out; fclose(text_file); - text_file = fopen(text_file_name, "r"); + text_file = fopen_os(text_file_name, T("r")); if (text_file == NULL) open_error(text_file_name); in = text_file; @@ -802,7 +802,7 @@ void output_stored_text(void) putc(c, out); } if (!lflag) - fprintf(out, line_format, ++outline + 1, code_file_name); + fprintf(out, line_format, ++outline + 1, code_file_name_disp); } @@ -855,7 +855,7 @@ void output_trailing_text(void) if (!lflag) { ++outline; - fprintf(out, line_format, lineno, input_file_name); + fprintf(out, line_format, lineno, input_file_name_disp); } if (c == '\n') ++outline; @@ -867,7 +867,7 @@ void output_trailing_text(void) if (!lflag) { ++outline; - fprintf(out, line_format, lineno, input_file_name); + fprintf(out, line_format, lineno, input_file_name_disp); } do { putc(c, out); } while ((c = *++cptr) != '\n'); ++outline; @@ -890,18 +890,18 @@ void output_trailing_text(void) putc('\n', out); } if (!lflag) - fprintf(out, line_format, ++outline + 1, code_file_name); + fprintf(out, line_format, ++outline + 1, code_file_name_disp); } -void copy_file(FILE **file, char *file_name) +void copy_file(FILE **file, char_os *file_name) { register int c, last; register FILE *out = code_file; int state = 0; fclose(*file); - *file = fopen(file_name, "r"); + *file = fopen_os(file_name, T("r")); if (*file == NULL) open_error(file_name); @@ -915,7 +915,7 @@ void copy_file(FILE **file, char *file_name) case ' ': state = (state == 2) ? 3 : 0; break; case '0': if (state == 3){ - fprintf (out, "%d \"%s", outline+2, code_file_name); + fprintf (out, "%d \"%s", outline+2, code_file_name_disp); c = '"'; } state = 0; diff --git a/yacc/reader.c b/yacc/reader.c index e3dd17095..1b0a5f6b8 100644 --- a/yacc/reader.c +++ b/yacc/reader.c @@ -563,7 +563,7 @@ void copy_text(void) if (line == 0) unterminated_text(t_lineno, t_line, t_cptr); } - fprintf(f, line_format, lineno, input_file_name); + fprintf(f, line_format, lineno, input_file_name_disp); loop: c = *cptr++; @@ -1286,7 +1286,7 @@ void copy_action(void) item->name); } fprintf(f, " Obj.repr(\n"); - fprintf(f, line_format, lineno, input_file_name); + fprintf(f, line_format, lineno, input_file_name_disp); for (i = 0; i < cptr - line; i++) fputc(' ', f); fputc ('(', f); @@ -1820,8 +1820,8 @@ void print_grammar(void) void reader(void) { - virtual_input_file_name = substring (input_file_name, 0, - strlen (input_file_name)); + virtual_input_file_name = caml_stat_strdup_of_os(input_file_name); + if (!virtual_input_file_name) no_space(); create_symbol_table(); read_declarations(); output_token_type(); diff --git a/yacc/wstr.c b/yacc/wstr.c new file mode 100644 index 000000000..c22feeecc --- /dev/null +++ b/yacc/wstr.c @@ -0,0 +1,60 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* David Allsopp, OCaml Labs, Cambridge. */ +/* */ +/* Copyright 2017 MetaStack Solutions Ltd. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* Need at least Windows Vista for WC_ERR_INVALID_CHARS */ +#define _WIN32_WINNT 0x600 +#define WINVER 0x600 +#include + +/* See corresponding values in runtime/win32.c */ +static int windows_unicode_enabled = WINDOWS_UNICODE; +static int windows_unicode_strict = 1; + +/* Adapted from runtime/win32.c */ +int win_wide_char_to_multi_byte(const wchar_t *s, int slen, + char *out, int outlen) +{ + int retcode; + + if (slen == 0) + return 0; + + if (windows_unicode_enabled != 0) + retcode = + WideCharToMultiByte(CP_UTF8, + windows_unicode_strict ? WC_ERR_INVALID_CHARS : 0, + s, slen, out, outlen, NULL, NULL); + else + retcode = + WideCharToMultiByte(CP_ACP, 0, s, slen, out, outlen, NULL, NULL); + + if (retcode == 0) + return -1; + + return retcode; +} + +char* caml_stat_strdup_of_utf16(const wchar_t *s) +{ + char *out = NULL; + int retcode; + + retcode = win_wide_char_to_multi_byte(s, -1, NULL, 0); + if (retcode >= 0) { + out = (char *)malloc(retcode); + win_wide_char_to_multi_byte(s, -1, out, retcode); + } + + return out; +}