From 2c9c9b199854a196819365590f86a5f1a37daa6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Tue, 22 Jul 2008 13:06:11 +0000 Subject: [PATCH] Backport latest HTML/PHP lexer fixes from Scintilla CVS (#2024387). git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@2799 ea778897-0a13-0410-b9d1-a72fbfd435f5 --- ChangeLog | 7 ++ scintilla/LexHTML.cxx | 241 ++++++++++++++++++++++++++++-------------- 2 files changed, 168 insertions(+), 80 deletions(-) diff --git a/ChangeLog b/ChangeLog index e873b551..0240f195 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2008-07-22 Enrico Tröger + + * scintilla/LexHTML.cxx: + Backport latest HTML/PHP lexer fixes from Scintilla CVS (#2024387). + + 2008-07-21 Nick Treleaven * src/main.c, src/socket.c, src/main.h: @@ -21,6 +27,7 @@ Move code to reload configuration files into utils_reload_configuration() and add it to the plugin API. + 2008-07-18 Enrico Tröger * plugins/classbuilder.c, plugins/demoplugin.c, plugins/export.c, diff --git a/scintilla/LexHTML.cxx b/scintilla/LexHTML.cxx index 9b8d5dc9..c0a47d9e 100644 --- a/scintilla/LexHTML.cxx +++ b/scintilla/LexHTML.cxx @@ -29,7 +29,7 @@ using namespace Scintilla; #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START) #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START) -enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock }; +enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment }; enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc }; static inline bool IsAWordChar(const int ch) { @@ -249,7 +249,7 @@ static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &k static int classifyTagHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, bool &tagDontFold, - bool caseSensitive, bool isXml) { + bool caseSensitive, bool isXml, bool allowScripts) { char s[30 + 2]; // Copy after the '<' unsigned int i = 0; @@ -268,31 +268,28 @@ static int classifyTagHTML(unsigned int start, unsigned int end, // if the current language is XML, I can fold any tag // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.) //...to find it in the list of no-container-tags - tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ",s)); + tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s)); //now we can remove the trailing space s[i] = '\0'; - bool isScript = false; + // No keywords -> all are known + // Name of a closing tag starts at s + 1 char chAttr = SCE_H_TAGUNKNOWN; if (s[0] == '!') { chAttr = SCE_H_SGML_DEFAULT; - } else if (s[0] == '/') { // Closing tag - if (keywords.InList(s + 1)) - chAttr = SCE_H_TAG; - } else { - if (keywords.InList(s)) { - chAttr = SCE_H_TAG; - isScript = 0 == strcmp(s, "script"); - } - } - if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) { - // No keywords -> all are known + } else if (!keywords || keywords.InList(s[0] == '/' ? s + 1 : s)) { chAttr = SCE_H_TAG; - isScript = 0 == strcmp(s, "script"); } styler.ColourTo(end, chAttr); - return isScript ? SCE_H_SCRIPT : chAttr; + if (chAttr == SCE_H_TAG) { + if (allowScripts && 0 == strcmp(s, "script")) { + chAttr = SCE_H_SCRIPT; + } else if (!isXml && 0 == strcmp(s, "comment")) { + chAttr = SCE_H_COMMENT; + } + } + return chAttr; } static void classifyWordHTJS(unsigned int start, unsigned int end, @@ -411,6 +408,9 @@ static int StateForScript(script_type scriptLanguage) { case eScriptSGML: Result = SCE_H_SGML_DEFAULT; break; + case eScriptComment: + Result = SCE_H_COMMENT; + break; default : Result = SCE_HJ_START; break; @@ -469,19 +469,51 @@ static bool isPHPStringState(int state) { (state == SCE_HPHP_COMPLEX_VARIABLE); } -static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) { +static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) { int j; + const int beginning = i - 1; + bool isValidSimpleString = false; + while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t')) i++; - phpStringDelimiter[0] = '\n'; - for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) { + + char ch = styler.SafeGetCharAt(i); + const char chNext = styler.SafeGetCharAt(i + 1); + if (!IsPhpWordStart(ch)) { + if (ch == '\'' && IsPhpWordStart(chNext)) { + i++; + ch = chNext; + isSimpleString = true; + } else { + phpStringDelimiter[0] = '\0'; + return beginning; + } + } + phpStringDelimiter[0] = ch; + i++; + + for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) { + if (!IsPhpWordChar(styler[j])) { + if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) { + isValidSimpleString = true; + j++; + break; + } else { + phpStringDelimiter[0] = '\0'; + return beginning; + } + } if (j - i < phpStringDelimiterSize - 2) phpStringDelimiter[j-i+1] = styler[j]; else i++; } - phpStringDelimiter[j-i+1] = '\0'; - return j; + if (isSimpleString && !isValidSimpleString) { + phpStringDelimiter[0] = '\0'; + return beginning; + } + phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0'; + return j - 1; } static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], @@ -510,11 +542,15 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty } state = SCE_H_DEFAULT; } - // String can be heredoc, must find a delimiter first - while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) { - startPos--; - length++; - state = styler.StyleAt(startPos); + // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState + if (isPHPStringState(state)) { + while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) { + startPos--; + length++; + state = styler.StyleAt(startPos); + } + if (startPos == 0) + state = SCE_H_DEFAULT; } styler.StartAt(startPos, static_cast(STYLE_MAX)); @@ -536,12 +572,17 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state script_type scriptLanguage = ScriptOfState(state); + // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment + if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) { + scriptLanguage = eScriptComment; + } const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0; const bool fold = foldHTML && styler.GetPropertyInt("fold", 0); const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1); const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0; + const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0; const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true); const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true); @@ -686,19 +727,22 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty case SCE_HP_STRING: case SCE_HP_TRIPLE: case SCE_HP_TRIPLEDOUBLE: + case SCE_HPHP_HSTRING: + case SCE_HPHP_SIMPLESTRING: + case SCE_HPHP_COMMENT: + case SCE_HPHP_COMMENTLINE: break; default : // check if the closing tag is a script tag - if (state == SCE_HJ_COMMENTLINE || isXml) { - char tag[7]; // room for the