LIBS: updated simplecpp
parent
7d6cadcfa9
commit
8940d77443
|
@ -23,6 +23,7 @@
|
|||
#include "simplecpp.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
|
@ -1503,8 +1504,11 @@ namespace simplecpp {
|
|||
expanded = true;
|
||||
}
|
||||
}
|
||||
if (!expanded)
|
||||
if (!expanded) {
|
||||
tokens->push_back(new Token(*tok));
|
||||
if (tok->macro.empty() && (par > 0 || tok->str() != "("))
|
||||
tokens->back()->macro = name();
|
||||
}
|
||||
}
|
||||
|
||||
if (tok->op == '(')
|
||||
|
@ -1608,7 +1612,14 @@ namespace simplecpp {
|
|||
if (sameline(tok, tok->next) && tok->next && tok->next->op == '#' && tok->next->next && tok->next->next->op == '#') {
|
||||
if (!sameline(tok, tok->next->next->next))
|
||||
throw invalidHashHash(tok->location, name());
|
||||
output->push_back(newMacroToken(expandArgStr(tok, parametertokens2), loc, isReplaced(expandedmacros)));
|
||||
TokenList new_output(files);
|
||||
if (!expandArg(&new_output, tok, parametertokens2))
|
||||
output->push_back(newMacroToken(tok->str(), loc, isReplaced(expandedmacros)));
|
||||
else if (new_output.empty()) // placemarker token
|
||||
output->push_back(newMacroToken("", loc, isReplaced(expandedmacros)));
|
||||
else
|
||||
for (const Token *tok2 = new_output.cfront(); tok2; tok2 = tok2->next)
|
||||
output->push_back(newMacroToken(tok2->str(), loc, isReplaced(expandedmacros)));
|
||||
tok = tok->next;
|
||||
} else {
|
||||
tok = expandToken(output, loc, tok, macros, expandedmacros, parametertokens2);
|
||||
|
@ -1804,29 +1815,13 @@ namespace simplecpp {
|
|||
partok = it->second.expand(output, loc, partok, macros, expandedmacros);
|
||||
else {
|
||||
output->push_back(newMacroToken(partok->str(), loc, isReplaced(expandedmacros)));
|
||||
output->back()->macro = partok->macro;
|
||||
partok = partok->next;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get string for token. If token is argument, the expanded string is returned.
|
||||
* @param tok The token
|
||||
* @param parametertokens parameters given when expanding this macro
|
||||
* @return string
|
||||
*/
|
||||
std::string expandArgStr(const Token *tok, const std::vector<const Token *> ¶metertokens) const {
|
||||
TokenList tokens(files);
|
||||
if (expandArg(&tokens, tok, parametertokens)) {
|
||||
std::string s;
|
||||
for (const Token *tok2 = tokens.cfront(); tok2; tok2 = tok2->next)
|
||||
s += tok2->str();
|
||||
return s;
|
||||
}
|
||||
return tok->str();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand #X => "X"
|
||||
* @param output destination tokenlist
|
||||
|
@ -2304,6 +2299,253 @@ static void simplifyName(simplecpp::TokenList &expr)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads at least minlen and at most maxlen digits (inc. prefix) in base base
|
||||
* from s starting at position pos and converts them to a
|
||||
* unsigned long long value, updating pos to point to the first
|
||||
* unused element of s.
|
||||
* Returns ULLONG_MAX if the result is not representable and
|
||||
* throws if the above requirements were not possible to satisfy.
|
||||
*/
|
||||
static unsigned long long stringToULLbounded(
|
||||
const std::string& s,
|
||||
std::size_t& pos,
|
||||
int base = 0,
|
||||
std::ptrdiff_t minlen = 1,
|
||||
std::size_t maxlen = std::string::npos
|
||||
)
|
||||
{
|
||||
std::string sub = s.substr(pos, maxlen);
|
||||
const char* start = sub.c_str();
|
||||
char* end;
|
||||
unsigned long long value = std::strtoull(start, &end, base);
|
||||
pos += end - start;
|
||||
if (end - start < minlen)
|
||||
throw std::runtime_error("expected digit");
|
||||
return value;
|
||||
}
|
||||
|
||||
/* Converts character literal (including prefix, but not ud-suffix)
|
||||
* to long long value.
|
||||
*
|
||||
* Assumes ASCII-compatible single-byte encoded str for narrow literals
|
||||
* and UTF-8 otherwise.
|
||||
*
|
||||
* For target assumes
|
||||
* - execution character set encoding matching str
|
||||
* - UTF-32 execution wide-character set encoding
|
||||
* - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied
|
||||
* - char16_t is 16bit wide
|
||||
* - char32_t is 32bit wide
|
||||
* - wchar_t is 32bit wide and unsigned
|
||||
* - matching char signedness to host
|
||||
* - matching sizeof(int) to host
|
||||
*
|
||||
* For host assumes
|
||||
* - ASCII-compatible execution character set
|
||||
*
|
||||
* For host and target assumes
|
||||
* - CHAR_BIT == 8
|
||||
* - two's complement
|
||||
*
|
||||
* Implements multi-character narrow literals according to GCC's behavior,
|
||||
* except multi code unit universal character names are not supported.
|
||||
* Multi-character wide literals are not supported.
|
||||
* Limited support of universal character names for non-UTF-8 execution character set encodings.
|
||||
*/
|
||||
long long simplecpp::characterLiteralToLL(const std::string& str)
|
||||
{
|
||||
// default is wide/utf32
|
||||
bool narrow = false;
|
||||
bool utf8 = false;
|
||||
bool utf16 = false;
|
||||
|
||||
std::size_t pos;
|
||||
|
||||
if (str.size() >= 1 && str[0] == '\'') {
|
||||
narrow = true;
|
||||
pos = 1;
|
||||
} else if (str.size() >= 2 && str[0] == 'u' && str[1] == '\'') {
|
||||
utf16 = true;
|
||||
pos = 2;
|
||||
} else if (str.size() >= 3 && str[0] == 'u' && str[1] == '8' && str[2] == '\'') {
|
||||
utf8 = true;
|
||||
pos = 3;
|
||||
} else if (str.size() >= 2 && (str[0] == 'L' || str[0] == 'U') && str[1] == '\'') {
|
||||
pos = 2;
|
||||
} else
|
||||
throw std::runtime_error("expected a character literal");
|
||||
|
||||
unsigned long long multivalue = 0;
|
||||
|
||||
std::size_t nbytes = 0;
|
||||
|
||||
while (pos + 1 < str.size()) {
|
||||
if (str[pos] == '\'' || str[pos] == '\n')
|
||||
throw std::runtime_error("raw single quotes and newlines not allowed in character literals");
|
||||
|
||||
if (nbytes >= 1 && !narrow)
|
||||
throw std::runtime_error("multiple characters only supported in narrow character literals");
|
||||
|
||||
unsigned long long value;
|
||||
|
||||
if (str[pos] == '\\') {
|
||||
pos++;
|
||||
char escape = str[pos++];
|
||||
|
||||
if (pos >= str.size())
|
||||
throw std::runtime_error("unexpected end of character literal");
|
||||
|
||||
switch (escape) {
|
||||
// obscure GCC extensions
|
||||
case '%':
|
||||
case '(':
|
||||
case '[':
|
||||
case '{':
|
||||
// standard escape sequences
|
||||
case '\'':
|
||||
case '"':
|
||||
case '?':
|
||||
case '\\':
|
||||
value = static_cast<unsigned char>(escape);
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
value = static_cast<unsigned char>('\a');
|
||||
break;
|
||||
case 'b':
|
||||
value = static_cast<unsigned char>('\b');
|
||||
break;
|
||||
case 'f':
|
||||
value = static_cast<unsigned char>('\f');
|
||||
break;
|
||||
case 'n':
|
||||
value = static_cast<unsigned char>('\n');
|
||||
break;
|
||||
case 'r':
|
||||
value = static_cast<unsigned char>('\r');
|
||||
break;
|
||||
case 't':
|
||||
value = static_cast<unsigned char>('\t');
|
||||
break;
|
||||
case 'v':
|
||||
value = static_cast<unsigned char>('\v');
|
||||
break;
|
||||
|
||||
// GCC extension for ESC character
|
||||
case 'e':
|
||||
case 'E':
|
||||
value = static_cast<unsigned char>('\x1b');
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
// octal escape sequences consist of 1 to 3 digits
|
||||
value = stringToULLbounded(str, --pos, 8, 1, 3);
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
// hexadecimal escape sequences consist of at least 1 digit
|
||||
value = stringToULLbounded(str, pos, 16);
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
case 'U': {
|
||||
// universal character names have exactly 4 or 8 digits
|
||||
std::size_t ndigits = (escape == 'u' ? 4 : 8);
|
||||
value = stringToULLbounded(str, pos, 16, ndigits, ndigits);
|
||||
|
||||
// UTF-8 encodes code points above 0x7f in multiple code units
|
||||
// code points above 0x10ffff are not allowed
|
||||
if (((narrow || utf8) && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
|
||||
throw std::runtime_error("code point too large");
|
||||
|
||||
if (value >= 0xd800 && value <= 0xdfff)
|
||||
throw std::runtime_error("surrogate code points not allowed in universal character names");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw std::runtime_error("invalid escape sequence");
|
||||
}
|
||||
} else {
|
||||
value = static_cast<unsigned char>(str[pos++]);
|
||||
|
||||
if (!narrow && value >= 0x80) {
|
||||
// Assuming this is a UTF-8 encoded code point.
|
||||
// This decoder may not completely validate the input.
|
||||
// Noncharacters are neither rejected nor replaced.
|
||||
|
||||
int additional_bytes;
|
||||
if (value >= 0xf5) // higher values would result in code points above 0x10ffff
|
||||
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
|
||||
else if (value >= 0xf0)
|
||||
additional_bytes = 3;
|
||||
else if (value >= 0xe0)
|
||||
additional_bytes = 2;
|
||||
else if (value >= 0xc2) // 0xc0 and 0xc1 are always overlong 2-bytes encodings
|
||||
additional_bytes = 1;
|
||||
else
|
||||
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
|
||||
|
||||
value &= (1 << (6 - additional_bytes)) - 1;
|
||||
|
||||
while (additional_bytes--) {
|
||||
if (pos + 1 >= str.size())
|
||||
throw std::runtime_error("assumed UTF-8 encoded source, but character literal ends unexpectedly");
|
||||
|
||||
unsigned char c = str[pos++];
|
||||
|
||||
if (((c >> 6) != 2) // ensure c has form 0xb10xxxxxx
|
||||
|| (!value && additional_bytes == 1 && c < 0xa0) // overlong 3-bytes encoding
|
||||
|| (!value && additional_bytes == 2 && c < 0x90)) // overlong 4-bytes encoding
|
||||
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
|
||||
|
||||
value = (value << 6) | (c & ((1 << 7) - 1));
|
||||
}
|
||||
|
||||
if (value >= 0xd800 && value <= 0xdfff)
|
||||
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
|
||||
|
||||
if ((utf8 && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
|
||||
throw std::runtime_error("code point too large");
|
||||
}
|
||||
}
|
||||
|
||||
if (((narrow || utf8) && value > std::numeric_limits<unsigned char>::max()) || (utf16 && value >> 16) || value >> 32)
|
||||
throw std::runtime_error("numeric escape sequence too large");
|
||||
|
||||
multivalue <<= CHAR_BIT;
|
||||
multivalue |= value;
|
||||
nbytes++;
|
||||
}
|
||||
|
||||
if (pos + 1 != str.size() || str[pos] != '\'')
|
||||
throw std::runtime_error("missing closing quote in character literal");
|
||||
|
||||
if (!nbytes)
|
||||
throw std::runtime_error("empty character literal");
|
||||
|
||||
// ordinary narrow character literal's value is determined by (possibly signed) char
|
||||
if (narrow && nbytes == 1)
|
||||
return static_cast<char>(multivalue);
|
||||
|
||||
// while multi-character literal's value is determined by (signed) int
|
||||
if (narrow)
|
||||
return static_cast<int>(multivalue);
|
||||
|
||||
// All other cases are unsigned. Since long long is at least 64bit wide,
|
||||
// while the literals at most 32bit wide, the conversion preserves all values.
|
||||
return multivalue;
|
||||
}
|
||||
|
||||
static void simplifyNumbers(simplecpp::TokenList &expr)
|
||||
{
|
||||
for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) {
|
||||
|
@ -2311,8 +2553,8 @@ static void simplifyNumbers(simplecpp::TokenList &expr)
|
|||
continue;
|
||||
if (tok->str().compare(0,2,"0x") == 0)
|
||||
tok->setstr(toString(stringToULL(tok->str())));
|
||||
else if (tok->str()[0] == '\'')
|
||||
tok->setstr(toString(tok->str()[1] & 0xffU));
|
||||
else if (!tok->number && tok->str().find('\'') != tok->str().npos)
|
||||
tok->setstr(toString(simplecpp::characterLiteralToLL(tok->str())));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2562,7 +2804,7 @@ static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token
|
|||
return true;
|
||||
}
|
||||
|
||||
void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector<std::string> &files, std::map<std::string, simplecpp::TokenList *> &filedata, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list<simplecpp::MacroUsage> *macroUsage)
|
||||
void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector<std::string> &files, std::map<std::string, simplecpp::TokenList *> &filedata, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list<simplecpp::MacroUsage> *macroUsage, std::list<simplecpp::IfCond> *ifCond)
|
||||
{
|
||||
std::map<std::string, std::size_t> sizeOfType(rawtokens.sizeOfType);
|
||||
sizeOfType.insert(std::make_pair("char", sizeof(char)));
|
||||
|
@ -2877,7 +3119,17 @@ void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenL
|
|||
tok = tmp->previous;
|
||||
}
|
||||
try {
|
||||
conditionIsTrue = (evaluate(expr, sizeOfType) != 0);
|
||||
if (ifCond) {
|
||||
std::string E;
|
||||
for (const simplecpp::Token *tok = expr.cfront(); tok; tok = tok->next)
|
||||
E += (E.empty() ? "" : " ") + tok->str();
|
||||
const long long result = evaluate(expr, sizeOfType);
|
||||
conditionIsTrue = (result != 0);
|
||||
ifCond->push_back(IfCond(rawtok->location, E, result));
|
||||
} else {
|
||||
const long long result = evaluate(expr, sizeOfType);
|
||||
conditionIsTrue = (result != 0);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
if (outputList) {
|
||||
Output out(rawtok->location.files);
|
||||
|
|
|
@ -108,7 +108,8 @@ namespace simplecpp {
|
|||
}
|
||||
|
||||
void flags() {
|
||||
name = (std::isalpha((unsigned char)string[0]) || string[0] == '_' || string[0] == '$');
|
||||
name = (std::isalpha((unsigned char)string[0]) || string[0] == '_' || string[0] == '$')
|
||||
&& (string.find('\'') == string.npos);
|
||||
comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*');
|
||||
number = std::isdigit((unsigned char)string[0]) || (string.size() > 1U && string[0] == '-' && std::isdigit((unsigned char)string[1]));
|
||||
op = (string.size() == 1U) ? string[0] : '\0';
|
||||
|
@ -287,6 +288,14 @@ namespace simplecpp {
|
|||
bool macroValueKnown;
|
||||
};
|
||||
|
||||
/** Tracking #if/#elif expressions */
|
||||
struct SIMPLECPP_LIB IfCond {
|
||||
explicit IfCond(const Location& location, const std::string &E, long long result) : location(location), E(E), result(result) {}
|
||||
Location location; // location of #if/#elif
|
||||
std::string E; // preprocessed condition
|
||||
long long result; // condition result
|
||||
};
|
||||
|
||||
/**
|
||||
* Command line preprocessor settings.
|
||||
* On the command line these are configured by -D, -U, -I, --include, -std
|
||||
|
@ -300,6 +309,8 @@ namespace simplecpp {
|
|||
std::string std;
|
||||
};
|
||||
|
||||
SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str);
|
||||
|
||||
SIMPLECPP_LIB std::map<std::string, TokenList*> load(const TokenList &rawtokens, std::vector<std::string> &filenames, const DUI &dui, OutputList *outputList = NULL);
|
||||
|
||||
/**
|
||||
|
@ -312,8 +323,9 @@ namespace simplecpp {
|
|||
* @param dui defines, undefs, and include paths
|
||||
* @param outputList output: list that will receive output messages
|
||||
* @param macroUsage output: macro usage
|
||||
* @param ifCond output: #if/#elif expressions
|
||||
*/
|
||||
SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector<std::string> &files, std::map<std::string, TokenList*> &filedata, const DUI &dui, OutputList *outputList = NULL, std::list<MacroUsage> *macroUsage = NULL);
|
||||
SIMPLECPP_LIB void preprocess(TokenList &output, const TokenList &rawtokens, std::vector<std::string> &files, std::map<std::string, TokenList*> &filedata, const DUI &dui, OutputList *outputList = NULL, std::list<MacroUsage> *macroUsage = NULL, std::list<IfCond> *ifCond = NULL);
|
||||
|
||||
/**
|
||||
* Deallocate data
|
||||
|
|
Loading…
Reference in New Issue