/****************************************************************************** Copyright (c) 2013 by Hugh Bailey This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. ******************************************************************************/ #include #include #include "platform.h" #include "cf-lexer.h" static inline void cf_convert_from_escape_literal(char **p_dst, const char **p_src) { char *dst = *p_dst; const char *src = *p_src; switch (*(src++)) { case '\'': *(dst++) = '\''; break; case '\"': *(dst++) = '\"'; break; case '\?': *(dst++) = '\?'; break; case '\\': *(dst++) = '\\'; break; case '0': *(dst++) = '\0'; break; case 'a': *(dst++) = '\a'; break; case 'b': *(dst++) = '\b'; break; case 'f': *(dst++) = '\f'; break; case 'n': *(dst++) = '\n'; break; case 'r': *(dst++) = '\r'; break; case 't': *(dst++) = '\t'; break; case 'v': *(dst++) = '\v'; break; /* hex */ case 'X': case 'x': *(dst++) = (char)strtoul(src, NULL, 16); src += 2; break; /* oct */ default: if (isdigit(*src)) { *(dst++) = (char)strtoul(src, NULL, 8); src += 3; } /* case 'u': case 'U': */ } *p_dst = dst; *p_src = src; } char *cf_literal_to_str(const char *literal, size_t count) { const char *temp_src; char *str, *temp_dst; if (!count) count = strlen(literal); if (count < 2) return NULL; if (literal[0] != literal[count-1]) return NULL; if (literal[0] != '\"' && literal[0] != '\'') return NULL; str = bmalloc(count - 1); temp_src = literal; temp_dst = str; while (*temp_src) { if (*temp_src == '\\') { temp_src++; cf_convert_from_escape_literal(&temp_dst, &temp_src); } else { *(temp_dst++) = *(temp_src++); } } *temp_dst = 0; return str; } static bool cf_is_token_break(struct base_token *start_token, const struct base_token *token) { switch (start_token->type) { case BASETOKEN_ALPHA: if (token->type == BASETOKEN_OTHER || token->type == BASETOKEN_WHITESPACE) return true; break; case BASETOKEN_DIGIT: if (token->type == BASETOKEN_WHITESPACE || (token->type == BASETOKEN_OTHER && *token->text.array != '.')) return true; break; case BASETOKEN_WHITESPACE: /* lump all non-newline whitespace together when possible */ if (is_space_or_tab(*start_token->text.array) && is_space_or_tab(*token->text.array)) break; return true; case BASETOKEN_OTHER: if (*start_token->text.array == '.' && token->type == BASETOKEN_DIGIT) { start_token->type = BASETOKEN_DIGIT; break; } case BASETOKEN_NONE: return true; } return false; } static inline bool cf_is_splice(const char *array) { return (*array == '\\' && is_newline(array[1])); } static inline void cf_pass_any_splices(const char **parray) { while (cf_is_splice(*parray)) *parray += 1 + newline_size((*parray)+1); } static inline bool cf_is_comment(const char *array) { const char *offset = array; if (*offset++ == '/') { cf_pass_any_splices(&offset); return (*offset == '*' || *offset == '/'); } return false; } static bool cf_lexer_process_comment(struct cf_lexer *lex, struct cf_token *out_token) { const char *offset; if (!cf_is_comment(out_token->unmerged_str.array)) return false; offset = lex->base_lexer.offset; cf_pass_any_splices(&offset); strcpy(lex->write_offset++, " "); out_token->str.len = 1; if (*offset == '/') { while (*++offset && !is_newline(*offset)) cf_pass_any_splices(&offset); } else if (*offset == '*') { bool was_star = false; lex->unexpected_eof = true; while (*++offset) { cf_pass_any_splices(&offset); if (was_star && *offset == '/') { offset++; lex->unexpected_eof = false; break; } else { was_star = (*offset == '*'); } } } out_token->unmerged_str.len += (size_t)(offset - out_token->unmerged_str.array); out_token->type = CFTOKEN_SPACETAB; lex->base_lexer.offset = offset; return true; } static inline void cf_lexer_write_strref(struct cf_lexer *lex, const struct strref *ref) { strncpy(lex->write_offset, ref->array, ref->len); lex->write_offset[ref->len] = 0; lex->write_offset += ref->len; } static bool cf_lexer_is_include(struct cf_lexer *lex) { bool found_include_import = false; bool found_preprocessor = false; size_t i; for (i = lex->tokens.num; i > 0; i--) { struct cf_token *token = lex->tokens.array+(i-1); if (is_space_or_tab(*token->str.array)) continue; if (!found_include_import) { if (strref_cmp(&token->str, "include") != 0 && strref_cmp(&token->str, "import") != 0) break; found_include_import = true; } else if (!found_preprocessor) { if (*token->str.array != '#') break; found_preprocessor = true; } else { return is_newline(*token->str.array); } } /* if starting line */ return found_preprocessor && found_include_import; } static void cf_lexer_getstrtoken(struct cf_lexer *lex, struct cf_token *out_token, char delimiter, bool allow_escaped_delimiters) { const char *offset = lex->base_lexer.offset; bool escaped = false; out_token->unmerged_str.len++; out_token->str.len++; cf_lexer_write_strref(lex, &out_token->unmerged_str); while (*offset) { cf_pass_any_splices(&offset); if (*offset == delimiter) { if (!escaped) { *lex->write_offset++ = *offset; out_token->str.len++; offset++; break; } } else if (is_newline(*offset)) { break; } *lex->write_offset++ = *offset; out_token->str.len++; escaped = (allow_escaped_delimiters && *offset == '\\'); offset++; } *lex->write_offset = 0; out_token->unmerged_str.len += (size_t)(offset - out_token->unmerged_str.array); out_token->type = CFTOKEN_STRING; lex->base_lexer.offset = offset; } static bool cf_lexer_process_string(struct cf_lexer *lex, struct cf_token *out_token) { char ch = *out_token->unmerged_str.array; if (ch == '<' && cf_lexer_is_include(lex)) { cf_lexer_getstrtoken(lex, out_token, '>', false); return true; } else if (ch == '"' || ch == '\'') { cf_lexer_getstrtoken(lex, out_token, ch, !cf_lexer_is_include(lex)); return true; } return false; } static inline enum cf_token_type cf_get_token_type(const struct cf_token *token, const struct base_token *start_token) { switch (start_token->type) { case BASETOKEN_ALPHA: return CFTOKEN_NAME; case BASETOKEN_DIGIT: return CFTOKEN_NUM; case BASETOKEN_WHITESPACE: if (is_newline(*token->str.array)) return CFTOKEN_NEWLINE; else return CFTOKEN_SPACETAB; case BASETOKEN_NONE: case BASETOKEN_OTHER: break; } return CFTOKEN_OTHER; } static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token) { struct base_token token, start_token; bool wrote_data = false; base_token_clear(&token); base_token_clear(&start_token); cf_token_clear(out_token); while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) { /* reclassify underscore as alpha for alnum tokens */ if (*token.text.array == '_') token.type = BASETOKEN_ALPHA; /* ignore escaped newlines to merge spliced lines */ if (cf_is_splice(token.text.array)) { lex->base_lexer.offset += newline_size(token.text.array+1); continue; } if (!wrote_data) { out_token->unmerged_str.array = token.text.array; out_token->str.array = lex->write_offset; /* if comment then output a space */ if (cf_lexer_process_comment(lex, out_token)) return true; /* process string tokens if any */ if (cf_lexer_process_string(lex, out_token)) return true; base_token_copy(&start_token, &token); wrote_data = true; } else if (cf_is_token_break(&start_token, &token)) { lex->base_lexer.offset -= token.text.len; break; } /* write token to CF lexer to account for splicing/comments */ cf_lexer_write_strref(lex, &token.text); out_token->str.len += token.text.len; } if (wrote_data) { out_token->unmerged_str.len = (size_t)(lex->base_lexer.offset - out_token->unmerged_str.array); out_token->type = cf_get_token_type(out_token, &start_token); } return wrote_data; } void cf_lexer_init(struct cf_lexer *lex) { lexer_init(&lex->base_lexer); da_init(lex->tokens); lex->file = NULL; lex->reformatted = NULL; lex->write_offset = NULL; lex->unexpected_eof = false; } void cf_lexer_free(struct cf_lexer *lex) { bfree(lex->file); bfree(lex->reformatted); lexer_free(&lex->base_lexer); da_free(lex->tokens); lex->file = NULL; lex->reformatted = NULL; lex->write_offset = NULL; lex->unexpected_eof = false; } bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file) { struct cf_token token; struct cf_token *last_token = NULL; cf_lexer_free(lex); if (!str || !*str) return false; if (file) lex->file = bstrdup(file); lexer_start(&lex->base_lexer, str); cf_token_clear(&token); lex->reformatted = bmalloc(strlen(str) + 1); lex->reformatted[0] = 0; lex->write_offset = lex->reformatted; while (cf_lexer_nexttoken(lex, &token)) { if (last_token && is_space_or_tab(*last_token->str.array) && is_space_or_tab(*token.str.array)) { cf_token_add(last_token, &token); continue; } token.lex = lex; last_token = da_push_back_new(lex->tokens); memcpy(last_token, &token, sizeof(struct cf_token)); } cf_token_clear(&token); token.str.array = lex->write_offset; token.unmerged_str.array = lex->base_lexer.offset; token.lex = lex; da_push_back(lex->tokens, &token); return !lex->unexpected_eof; } /* ------------------------------------------------------------------------- */ struct macro_param { struct cf_token name; DARRAY(struct cf_token) tokens; }; static inline void macro_param_init(struct macro_param *param) { cf_token_clear(¶m->name); da_init(param->tokens); } static inline void macro_param_free(struct macro_param *param) { cf_token_clear(¶m->name); da_free(param->tokens); } /* ------------------------------------------------------------------------- */ struct macro_params { DARRAY(struct macro_param) params; }; static inline void macro_params_init(struct macro_params *params) { da_init(params->params); } static inline void macro_params_free(struct macro_params *params) { size_t i; for (i = 0; i < params->params.num; i++) macro_param_free(params->params.array+i); da_free(params->params); } static inline struct macro_param *get_macro_param( const struct macro_params *params, const struct strref *name) { size_t i; if (!params) return NULL; for (i = 0; i < params->params.num; i++) { struct macro_param *param = params->params.array+i; if (strref_cmp_strref(¶m->name.str, name) == 0) return param; } return NULL; } /* ------------------------------------------------------------------------- */ static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token); static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token); static inline bool go_to_newline(struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; while (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE) cur_token++; *p_cur_token = cur_token; return cur_token->type != CFTOKEN_NONE; } static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor) { struct cf_token *cur_token = *p_cur_token; if (cur_token->type != CFTOKEN_NONE) cur_token++; /* if preprocessor, stop at newline */ while (cur_token->type == CFTOKEN_SPACETAB && (preprocessor || cur_token->type == CFTOKEN_NEWLINE)) cur_token++; *p_cur_token = cur_token; return cur_token->type != CFTOKEN_NONE; } static inline void cf_gettokenoffset(struct cf_preprocessor *pp, const struct cf_token *token, uint32_t *row, uint32_t *col) { lexer_getstroffset(&pp->lex->base_lexer, token->unmerged_str.array, row, col); } static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token, const char *message, int error_level, const char *val1, const char *val2, const char *val3) { uint32_t row, col; cf_gettokenoffset(pp, token, &row, &col); if (!val1 && !val2 && !val3) { error_data_add(pp->ed, token->lex->file, row, col, message, error_level); } else { struct dstr formatted; dstr_init(&formatted); dstr_safe_printf(&formatted, message, val1, val2, val3, NULL); error_data_add(pp->ed, token->lex->file, row, col, formatted.array, error_level); dstr_free(&formatted); } } static inline void cf_adderror(struct cf_preprocessor *pp, const struct cf_token *token, const char *error, const char *val1, const char *val2, const char *val3) { cf_addew(pp, token, error, LEVEL_ERROR, val1, val2, val3); } static inline void cf_addwarning(struct cf_preprocessor *pp, const struct cf_token *token, const char *warning, const char *val1, const char *val2, const char *val3) { cf_addew(pp, token, warning, LEVEL_WARNING, val1, val2, val3); } static inline void cf_adderror_expecting(struct cf_preprocessor *pp, const struct cf_token *token, const char *expecting) { cf_adderror(pp, token, "Expected $1", expecting, NULL, NULL); } static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp, const struct cf_token *token) { cf_adderror(pp, token, "Unexpected token after preprocessor, expected " "newline", NULL, NULL, NULL); } static inline void cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp, const struct cf_token *token) { cf_adderror(pp, token, "Unexpected end of file before #endif", NULL, NULL, NULL); } static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp, const struct cf_token *token) { cf_adderror(pp, token, "Unexpected end of file", NULL, NULL, NULL); } static void cf_include_file(struct cf_preprocessor *pp, const struct cf_token *file_token) { struct cf_lexer new_lex; struct dstr str_file; FILE *file; char *file_data; struct cf_token *tokens; size_t i; dstr_init(&str_file); dstr_copy_strref(&str_file, &file_token->str); dstr_mid(&str_file, &str_file, 1, str_file.len-2); /* if dependency already exists, run preprocessor on it */ for (i = 0; i < pp->dependencies.num; i++) { struct cf_lexer *dep = pp->dependencies.array+i; if (strcmp(dep->file, str_file.array) == 0) { tokens = cf_lexer_gettokens(dep); cf_preprocess_tokens(pp, false, &tokens); goto exit; } } file = os_fopen(str_file.array, "rb"); if (!file) { cf_adderror(pp, file_token, "Could not open file '$1'", file_token->str.array, NULL, NULL); goto exit; } os_fread_utf8(file, &file_data); fclose(file); cf_lexer_init(&new_lex); cf_lexer_lex(&new_lex, file_data, str_file.array); tokens = cf_lexer_gettokens(&new_lex); cf_preprocess_tokens(pp, false, &tokens); bfree(file_data); da_push_back(pp->dependencies, &new_lex); exit: dstr_free(&str_file); } static inline bool is_sys_include(struct strref *ref) { return ref->len >= 2 && ref->array[0] == '<' && ref->array[ref->len-1] == '>'; } static inline bool is_loc_include(struct strref *ref) { return ref->len >= 2 && ref->array[0] == '"' && ref->array[ref->len-1] == '"'; } static void cf_preprocess_include(struct cf_preprocessor *pp, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; if (pp->ignore_state) { go_to_newline(p_cur_token); return; } next_token(&cur_token, true); if (cur_token->type != CFTOKEN_STRING) { cf_adderror_expecting(pp, cur_token, "string"); go_to_newline(&cur_token); goto exit; } if (is_sys_include(&cur_token->str)) { /* TODO */ } else if (is_loc_include(&cur_token->str)) { if (!pp->ignore_state) cf_include_file(pp, cur_token); } else { cf_adderror(pp, cur_token, "Invalid or incomplete string", NULL, NULL, NULL); go_to_newline(&cur_token); goto exit; } cur_token++; exit: *p_cur_token = cur_token; } static bool cf_preprocess_macro_params(struct cf_preprocessor *pp, struct cf_def *def, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; bool success = false; def->macro = true; do { next_token(&cur_token, true); if (cur_token->type != CFTOKEN_NAME) { cf_adderror_expecting(pp, cur_token, "identifier"); go_to_newline(&cur_token); goto exit; } cf_def_addparam(def, cur_token); next_token(&cur_token, true); if (cur_token->type != CFTOKEN_OTHER || (*cur_token->str.array != ',' && *cur_token->str.array != ')')) { cf_adderror_expecting(pp, cur_token, "',' or ')'"); go_to_newline(&cur_token); goto exit; } } while (*cur_token->str.array != ')'); /* ended properly, now go to first define token (or newline) */ next_token(&cur_token, true); success = true; exit: *p_cur_token = cur_token; return success; } #define INVALID_INDEX ((size_t)-1) static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp, const struct strref *def_name) { struct cf_def *array = pp->defines.array; size_t i; for (i = 0; i < pp->defines.num; i++) { struct cf_def *cur_def = array+i; if (strref_cmp_strref(&cur_def->name.str, def_name) == 0) return i; } return INVALID_INDEX; } static inline struct cf_def *cf_preprocess_get_def(struct cf_preprocessor *pp, const struct strref *def_name) { size_t idx = cf_preprocess_get_def_idx(pp, def_name); if (idx == INVALID_INDEX) return NULL; return pp->defines.array+idx; } static char space_filler[2] = " "; static inline void append_space(struct cf_preprocessor *pp, struct darray *tokens, const struct cf_token *base) { struct cf_token token; strref_set(&token.str, space_filler, 1); token.type = CFTOKEN_SPACETAB; if (base) { token.lex = base->lex; strref_copy(&token.unmerged_str, &base->unmerged_str); } else { token.lex = pp->lex; strref_copy(&token.unmerged_str, &token.str); } darray_push_back(sizeof(struct cf_token), tokens, &token); } static inline void append_end_token(struct darray *tokens) { struct cf_token end; cf_token_clear(&end); darray_push_back(sizeof(struct cf_token), tokens, &end); } static void cf_preprocess_define(struct cf_preprocessor *pp, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; struct cf_def def; if (pp->ignore_state) { go_to_newline(p_cur_token); return; } cf_def_init(&def); next_token(&cur_token, true); if (cur_token->type != CFTOKEN_NAME) { cf_adderror_expecting(pp, cur_token, "identifier"); go_to_newline(&cur_token); goto exit; } append_space(pp, &def.tokens.da, NULL); cf_token_copy(&def.name, cur_token); if (!next_token(&cur_token, true)) goto complete; /* process macro */ if (*cur_token->str.array == '(') { if (!cf_preprocess_macro_params(pp, &def, &cur_token)) goto error; } while (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE) cf_def_addtoken(&def, cur_token++); complete: append_end_token(&def.tokens.da); append_space(pp, &def.tokens.da, NULL); da_push_back(pp->defines, &def); goto exit; error: cf_def_free(&def); exit: *p_cur_token = cur_token; } static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp, const struct strref *ref) { size_t def_idx = cf_preprocess_get_def_idx(pp, ref); if (def_idx != INVALID_INDEX) { struct cf_def *array = pp->defines.array; cf_def_free(array+def_idx); da_erase(pp->defines, def_idx); } } static void cf_preprocess_undef(struct cf_preprocessor *pp, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; if (pp->ignore_state) { go_to_newline(p_cur_token); return; } next_token(&cur_token, true); if (cur_token->type != CFTOKEN_NAME) { cf_adderror_expecting(pp, cur_token, "identifier"); go_to_newline(&cur_token); goto exit; } cf_preprocess_remove_def_strref(pp, &cur_token->str); cur_token++; exit: *p_cur_token = cur_token; } /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */ static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp, bool ignore, struct cf_token **p_cur_token) { bool eof; if (!next_token(p_cur_token, true)) return false; if (!pp->ignore_state) { pp->ignore_state = ignore; cf_preprocess_tokens(pp, true, p_cur_token); pp->ignore_state = false; } else { cf_preprocess_tokens(pp, true, p_cur_token); } eof = ((*p_cur_token)->type == CFTOKEN_NONE); if (eof) cf_adderror_unexpected_endif_eof(pp, *p_cur_token); return !eof; } static void cf_preprocess_ifdef(struct cf_preprocessor *pp, bool ifnot, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; struct cf_def *def; bool is_true; next_token(&cur_token, true); if (cur_token->type != CFTOKEN_NAME) { cf_adderror_expecting(pp, cur_token, "identifier"); go_to_newline(&cur_token); goto exit; } def = cf_preprocess_get_def(pp, &cur_token->str); is_true = (def == NULL) == ifnot; if (!cf_preprocess_subblock(pp, !is_true, &cur_token)) goto exit; if (strref_cmp(&cur_token->str, "else") == 0) { if (!cf_preprocess_subblock(pp, is_true, &cur_token)) goto exit; /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/ } cur_token++; exit: *p_cur_token = cur_token; } static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token) { struct cf_token *cur_token = *p_cur_token; if (strref_cmp(&cur_token->str, "include") == 0) { cf_preprocess_include(pp, p_cur_token); } else if (strref_cmp(&cur_token->str, "define") == 0) { cf_preprocess_define(pp, p_cur_token); } else if (strref_cmp(&cur_token->str, "undef") == 0) { cf_preprocess_undef(pp, p_cur_token); } else if (strref_cmp(&cur_token->str, "ifdef") == 0) { cf_preprocess_ifdef(pp, false, p_cur_token); } else if (strref_cmp(&cur_token->str, "ifndef") == 0) { cf_preprocess_ifdef(pp, true, p_cur_token); /*} else if (strref_cmp(&cur_token->str, "if") == 0) { TODO;*/ } else if (strref_cmp(&cur_token->str, "else") == 0 || /*strref_cmp(&cur_token->str, "elif") == 0 ||*/ strref_cmp(&cur_token->str, "endif") == 0) { if (!if_block) { struct dstr name; dstr_init_strref(&name, &cur_token->str); cf_adderror(pp, cur_token,"#$1 outside of " "#if/#ifdef/#ifndef block", name.array, NULL, NULL); dstr_free(&name); (*p_cur_token)++; return true; } return false; } else if (cur_token->type != CFTOKEN_NEWLINE && cur_token->type != CFTOKEN_NONE) { /* * TODO: language-specific preprocessor stuff should be sent to * handler of some sort */ (*p_cur_token)++; } return true; } static void cf_preprocess_addtoken(struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */ struct cf_token **p_cur_token, const struct cf_token *base, const struct macro_params *params); /* * collects tokens for a macro parameter * * note that it is important to make sure that any usage of function calls * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not * let it stop on the comma at "1," */ static void cf_preprocess_save_macro_param(struct cf_preprocessor *pp, struct cf_token **p_cur_token, struct macro_param *param, const struct cf_token *base, const struct macro_params *cur_params) { struct cf_token *cur_token = *p_cur_token; int brace_count = 0; append_space(pp, ¶m->tokens.da, base); while (cur_token->type != CFTOKEN_NONE) { if (*cur_token->str.array == '(') { brace_count++; } else if (*cur_token->str.array == ')') { if (brace_count) brace_count--; else break; } else if (*cur_token->str.array == ',') { if (!brace_count) break; } cf_preprocess_addtoken(pp, ¶m->tokens.da, &cur_token, base, cur_params); } if (cur_token->type == CFTOKEN_NONE) cf_adderror_unexpected_eof(pp, cur_token); append_space(pp, ¶m->tokens.da, base); append_end_token(¶m->tokens.da); *p_cur_token = cur_token; } static inline bool param_is_whitespace(const struct macro_param *param) { struct cf_token *array = param->tokens.array; size_t i; for (i = 0; i < param->tokens.num; i++) if (array[i].type != CFTOKEN_NONE && array[i].type != CFTOKEN_SPACETAB && array[i].type != CFTOKEN_NEWLINE) return false; return true; } /* collects parameter tokens of a used macro and stores them for the unwrap */ static void cf_preprocess_save_macro_params(struct cf_preprocessor *pp, struct cf_token **p_cur_token, const struct cf_def *def, const struct cf_token *base, const struct macro_params *cur_params, struct macro_params *dst) { struct cf_token *cur_token = *p_cur_token; size_t count = 0; next_token(&cur_token, false); if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') { cf_adderror_expecting(pp, cur_token, "'('"); goto exit; } do { struct macro_param param; macro_param_init(¶m); cur_token++; count++; cf_preprocess_save_macro_param(pp, &cur_token, ¶m, base, cur_params); if (cur_token->type != CFTOKEN_OTHER || (*cur_token->str.array != ',' && *cur_token->str.array != ')')) { macro_param_free(¶m); cf_adderror_expecting(pp, cur_token, "',' or ')'"); goto exit; } if (param_is_whitespace(¶m)) { /* if 0-param macro, ignore first entry */ if (count == 1 && !def->params.num && *cur_token->str.array == ')') { macro_param_free(¶m); break; } } if (count <= def->params.num) { cf_token_copy(¶m.name, cf_def_getparam(def, count-1)); da_push_back(dst->params, ¶m); } else { macro_param_free(¶m); } } while (*cur_token->str.array != ')'); if (count != def->params.num) cf_adderror(pp, cur_token, "Mismatching number of macro parameters", NULL, NULL, NULL); exit: *p_cur_token = cur_token; } static inline void cf_preprocess_unwrap_param(struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */ struct cf_token **p_cur_token, const struct cf_token *base, const struct macro_param *param) { struct cf_token *cur_token = *p_cur_token; struct cf_token *cur_param_token = param->tokens.array; while (cur_param_token->type != CFTOKEN_NONE) cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL); cur_token++; *p_cur_token = cur_token; } static inline void cf_preprocess_unwrap_define(struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */ struct cf_token **p_cur_token, const struct cf_token *base, const struct cf_def *def, const struct macro_params *cur_params) { struct cf_token *cur_token = *p_cur_token; struct macro_params new_params; struct cf_token *cur_def_token = def->tokens.array; macro_params_init(&new_params); if (def->macro) cf_preprocess_save_macro_params(pp, &cur_token, def, base, cur_params, &new_params); while (cur_def_token->type != CFTOKEN_NONE) cf_preprocess_addtoken(pp, dst, &cur_def_token, base, &new_params); macro_params_free(&new_params); cur_token++; *p_cur_token = cur_token; } static void cf_preprocess_addtoken(struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */ struct cf_token **p_cur_token, const struct cf_token *base, const struct macro_params *params) { struct cf_token *cur_token = *p_cur_token; if (pp->ignore_state) goto ignore; if (!base) base = cur_token; if (cur_token->type == CFTOKEN_NAME) { struct cf_def *def; struct macro_param *param; param = get_macro_param(params, &cur_token->str); if (param) { cf_preprocess_unwrap_param(pp, dst, &cur_token, base, param); goto exit; } def = cf_preprocess_get_def(pp, &cur_token->str); if (def) { cf_preprocess_unwrap_define(pp, dst, &cur_token, base, def, params); goto exit; } } darray_push_back(sizeof(struct cf_token), dst, cur_token); ignore: cur_token++; exit: *p_cur_token = cur_token; } static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block, struct cf_token **p_cur_token) { bool newline = true; bool preprocessor_line = if_block; struct cf_token *cur_token = *p_cur_token; while (cur_token->type != CFTOKEN_NONE) { if(cur_token->type != CFTOKEN_SPACETAB && cur_token->type != CFTOKEN_NEWLINE) { if (preprocessor_line) { cf_adderror_expected_newline(pp, cur_token); if (!go_to_newline(&cur_token)) break; } if (newline && *cur_token->str.array == '#') { next_token(&cur_token, true); preprocessor_line = true; if (!cf_preprocessor(pp, if_block, &cur_token)) break; continue; } newline = false; } if (cur_token->type == CFTOKEN_NEWLINE) { newline = true; preprocessor_line = false; } else if (cur_token->type == CFTOKEN_NONE) { break; } cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL, NULL); } *p_cur_token = cur_token; } void cf_preprocessor_init(struct cf_preprocessor *pp) { da_init(pp->defines); da_init(pp->sys_include_dirs); da_init(pp->dependencies); da_init(pp->tokens); pp->lex = NULL; pp->ed = NULL; pp->ignore_state = false; } void cf_preprocessor_free(struct cf_preprocessor *pp) { struct cf_lexer *dependencies = pp->dependencies.array; char **sys_include_dirs = pp->sys_include_dirs.array; struct cf_def *defs = pp->defines.array; size_t i; for (i = 0; i defines.num; i++) cf_def_free(defs+i); for (i = 0; i < pp->sys_include_dirs.num; i++) bfree(sys_include_dirs[i]); for (i = 0; i < pp->dependencies.num; i++) cf_lexer_free(dependencies+i); da_free(pp->defines); da_free(pp->sys_include_dirs); da_free(pp->dependencies); da_free(pp->tokens); pp->lex = NULL; pp->ed = NULL; pp->ignore_state = false; } bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex, struct error_data *ed) { struct cf_token *token = cf_lexer_gettokens(lex); if (!token) return false; pp->ed = ed; pp->lex = lex; cf_preprocess_tokens(pp, false, &token); da_push_back(pp->tokens, token); return !lex->unexpected_eof; } void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def) { struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str); if (existing) { struct dstr name; dstr_init_strref(&name, &def->name.str); cf_addwarning(pp, &def->name, "Token $1 already defined", name.array, NULL, NULL); cf_addwarning(pp, &existing->name, "Previous definition of $1 is here", name.array, NULL, NULL); cf_def_free(existing); memcpy(existing, def, sizeof(struct cf_def)); } else { da_push_back(pp->defines, &def); } } void cf_preprocessor_remove_def(struct cf_preprocessor *pp, const char *def_name) { struct strref ref; ref.array = def_name; ref.len = strlen(def_name); cf_preprocess_remove_def_strref(pp, &ref); }