From 396fa96f24ad479f0c9d69c91c5992a18a819057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Sat, 18 Oct 2008 10:56:10 +0000 Subject: [PATCH] Update Scintilla to version 1.76. git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@3114 ea778897-0a13-0410-b9d1-a72fbfd435f5 --- ChangeLog | 2 + NEWS | 1 + scintilla/CellBuffer.cxx | 4 + scintilla/CellBuffer.h | 1 + scintilla/Document.cxx | 426 +++--- scintilla/Document.h | 25 +- scintilla/Editor.cxx | 35 +- scintilla/ExternalLexer.cxx | 6 +- scintilla/LexAsm.cxx | 10 +- scintilla/LexBash.cxx | 751 ++++------ scintilla/LexCPP.cxx | 2 +- scintilla/LexCSS.cxx | 144 +- scintilla/LexFortran.cxx | 13 +- scintilla/LexHTML.cxx | 22 +- scintilla/LexHaskell.cxx | 3 + scintilla/LexLua.cxx | 92 +- scintilla/LexOthers.cxx | 85 +- scintilla/LexPerl.cxx | 2145 ++++++++++++++--------------- scintilla/LexRuby.cxx | 3 +- scintilla/PlatGTK.cxx | 42 +- scintilla/RESearch.cxx | 24 +- scintilla/RESearch.h | 4 +- scintilla/RunStyles.h | 5 + scintilla/ScintillaGTK.cxx | 6 +- scintilla/SplitVector.h | 6 + scintilla/include/SciLexer.h | 27 + scintilla/include/Scintilla.h | 5 +- scintilla/include/Scintilla.iface | 46 +- src/plugindata.h | 2 +- 29 files changed, 1948 insertions(+), 1989 deletions(-) diff --git a/ChangeLog b/ChangeLog index a45d4470..8237e132 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,8 @@ to get strings translated with GLib 2.18+. * data/filetypers.tcl: Use 'tclsh' as default Compile/Run command (part of #2037728). + * scintilla/*, scintilla/include/, src/plugindata.h: + Update Scintilla to version 1.76. 2008-10-17 Enrico Tröger diff --git a/NEWS b/NEWS index e16e42a8..d55232bb 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,7 @@ Geany 0.15 (October 19, 2008) * Add a debug messages window to easily view debug messages/warnings. Editor: + * Update Scintilla to version 1.77 (includes many fixes). * Fix documents sometimes not being colourised properly after a reload (#1948857). * Add basic Line Breaking option in the Document menu and 'Line breaking diff --git a/scintilla/CellBuffer.cxx b/scintilla/CellBuffer.cxx index 85bf4355..0e9ae695 100644 --- a/scintilla/CellBuffer.cxx +++ b/scintilla/CellBuffer.cxx @@ -587,6 +587,10 @@ char CellBuffer::StyleAt(int position) { return style.ValueAt(position); } +const char *CellBuffer::BufferPointer() { + return substance.BufferPointer(); +} + // The char* returned is to an allocation owned by the undo history const char *CellBuffer::InsertString(int position, const char *s, int insertLength, bool &startSequence) { char *data = 0; diff --git a/scintilla/CellBuffer.h b/scintilla/CellBuffer.h index 4f654a8f..4b83f48e 100644 --- a/scintilla/CellBuffer.h +++ b/scintilla/CellBuffer.h @@ -171,6 +171,7 @@ public: char CharAt(int position) const; void GetCharRange(char *buffer, int position, int lengthRetrieve); char StyleAt(int position); + const char *BufferPointer(); int Length() const; void Allocate(int newSize); diff --git a/scintilla/Document.cxx b/scintilla/Document.cxx index ff8d0fbc..bded3a32 100644 --- a/scintilla/Document.cxx +++ b/scintilla/Document.cxx @@ -73,8 +73,7 @@ Document::Document() { lenWatchers = 0; matchesValid = false; - pre = 0; - substituted = 0; + regex = 0; } Document::~Document() { @@ -84,10 +83,8 @@ Document::~Document() { delete []watchers; watchers = 0; lenWatchers = 0; - delete pre; - pre = 0; - delete []substituted; - substituted = 0; + delete regex; + regex = 0; } // Increase reference count and return its previous value. @@ -173,7 +170,7 @@ int Document::LineEndPosition(int position) { int Document::VCHomePosition(int position) { int line = LineFromPosition(position); int startPosition = LineStart(line); - int endLine = LineStart(line + 1) - 1; + int endLine = LineEnd(line); int startText = startPosition; while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t' ) ) startText++; @@ -1015,123 +1012,18 @@ static inline char MakeLowerCase(char ch) { return static_cast(ch - 'A' + 'a'); } -// Define a way for the Regular Expression code to access the document -class DocumentIndexer : public CharacterIndexer { - Document *pdoc; - int end; -public: - DocumentIndexer(Document *pdoc_, int end_) : - pdoc(pdoc_), end(end_) { - } - - virtual ~DocumentIndexer() { - } - - virtual char CharAt(int index) { - if (index < 0 || index >= end) - return 0; - else - return pdoc->CharAt(index); - } -}; - /** * Find text in document, supporting both forward and backward * searches (just pass minPos > maxPos to do a backward search) * Has not been tested with backwards DBCS searches yet. */ long Document::FindText(int minPos, int maxPos, const char *s, - bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix, + bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, int *length) { if (regExp) { - if (!pre) - pre = new RESearch(&charClass); - if (!pre) - return -1; - - int increment = (minPos <= maxPos) ? 1 : -1; - - int startPos = minPos; - int endPos = maxPos; - - // Range endpoints should not be inside DBCS characters, but just in case, move them. - startPos = MovePositionOutsideChar(startPos, 1, false); - endPos = MovePositionOutsideChar(endPos, 1, false); - - const char *errmsg = pre->Compile(s, *length, caseSensitive, posix); - if (errmsg) { - return -1; - } - // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) - // Replace first '.' with '-' in each property file variable reference: - // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) - // Replace: $(\1-\2) - int lineRangeStart = LineFromPosition(startPos); - int lineRangeEnd = LineFromPosition(endPos); - if ((increment == 1) && - (startPos >= LineEnd(lineRangeStart)) && - (lineRangeStart < lineRangeEnd)) { - // the start position is at end of line or between line end characters. - lineRangeStart++; - startPos = LineStart(lineRangeStart); - } - int pos = -1; - int lenRet = 0; - char searchEnd = s[*length - 1]; - int lineRangeBreak = lineRangeEnd + increment; - for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { - int startOfLine = LineStart(line); - int endOfLine = LineEnd(line); - if (increment == 1) { - if (line == lineRangeStart) { - if ((startPos != startOfLine) && (s[0] == '^')) - continue; // Can't match start of line if start position after start of line - startOfLine = startPos; - } - if (line == lineRangeEnd) { - if ((endPos != endOfLine) && (searchEnd == '$')) - continue; // Can't match end of line if end position before end of line - endOfLine = endPos; - } - } else { - if (line == lineRangeEnd) { - if ((endPos != startOfLine) && (s[0] == '^')) - continue; // Can't match start of line if end position after start of line - startOfLine = endPos; - } - if (line == lineRangeStart) { - if ((startPos != endOfLine) && (searchEnd == '$')) - continue; // Can't match end of line if start position before end of line - endOfLine = startPos; - } - } - - DocumentIndexer di(this, endOfLine); - int success = pre->Execute(di, startOfLine, endOfLine); - if (success) { - pos = pre->bopat[0]; - lenRet = pre->eopat[0] - pre->bopat[0]; - if (increment == -1) { - // Check for the last match on this line. - int repetitions = 1000; // Break out of infinite loop - while (success && (pre->eopat[0] <= endOfLine) && (repetitions--)) { - success = pre->Execute(di, pos+1, endOfLine); - if (success) { - if (pre->eopat[0] <= minPos) { - pos = pre->bopat[0]; - lenRet = pre->eopat[0] - pre->bopat[0]; - } else { - success = 0; - } - } - } - } - break; - } - } - *length = lenRet; - return pos; - + if (!regex) + regex = CreateRegexSearch(&charClass); + return regex->FindText(this, minPos, maxPos, s, caseSensitive, word, wordStart, flags, length); } else { bool forward = minPos <= maxPos; @@ -1201,86 +1093,7 @@ long Document::FindText(int minPos, int maxPos, const char *s, } const char *Document::SubstituteByPosition(const char *text, int *length) { - if (!pre) - return 0; - delete []substituted; - substituted = 0; - DocumentIndexer di(this, Length()); - if (!pre->GrabMatches(di)) - return 0; - unsigned int lenResult = 0; - for (int i = 0; i < *length; i++) { - if (text[i] == '\\') { - if (text[i + 1] >= '1' && text[i + 1] <= '9') { - unsigned int patNum = text[i + 1] - '0'; - lenResult += pre->eopat[patNum] - pre->bopat[patNum]; - i++; - } else { - switch (text[i + 1]) { - case 'a': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - i++; - } - lenResult++; - } - } else { - lenResult++; - } - } - substituted = new char[lenResult + 1]; - if (!substituted) - return 0; - char *o = substituted; - for (int j = 0; j < *length; j++) { - if (text[j] == '\\') { - if (text[j + 1] >= '1' && text[j + 1] <= '9') { - unsigned int patNum = text[j + 1] - '0'; - unsigned int len = pre->eopat[patNum] - pre->bopat[patNum]; - if (pre->pat[patNum]) // Will be null if try for a match that did not occur - memcpy(o, pre->pat[patNum], len); - o += len; - j++; - } else { - j++; - switch (text[j]) { - case 'a': - *o++ = '\a'; - break; - case 'b': - *o++ = '\b'; - break; - case 'f': - *o++ = '\f'; - break; - case 'n': - *o++ = '\n'; - break; - case 'r': - *o++ = '\r'; - break; - case 't': - *o++ = '\t'; - break; - case 'v': - *o++ = '\v'; - break; - default: - *o++ = '\\'; - j--; - } - } - } else { - *o++ = text[j]; - } - } - *o = '\0'; - *length = lenResult; - return substituted; + return regex->SubstituteByPosition(this, text, length); } int Document::LinesTotal() const { @@ -1380,7 +1193,7 @@ void Document::EnsureStyledTo(int pos) { } } -int Document::SetLineState(int line, int state) { +int Document::SetLineState(int line, int state) { int statePrevious = cb.SetLineState(line, state); if (state != statePrevious) { DocModification mh(SC_MOD_CHANGELINESTATE, 0, 0, 0, 0, line); @@ -1630,3 +1443,222 @@ int Document::BraceMatch(int position, int /*maxReStyle*/) { } return - 1; } + +/** + * Implementation of RegexSearchBase for the default built-in regular expression engine + */ +class BuiltinRegex : public RegexSearchBase { +public: + BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {} + + virtual ~BuiltinRegex() { + delete substituted; + } + + virtual long FindText(Document *doc, int minPos, int maxPos, const char *s, + bool caseSensitive, bool word, bool wordStart, int flags, + int *length); + + virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length); + +private: + RESearch search; + char *substituted; +}; + +// Define a way for the Regular Expression code to access the document +class DocumentIndexer : public CharacterIndexer { + Document *pdoc; + int end; +public: + DocumentIndexer(Document *pdoc_, int end_) : + pdoc(pdoc_), end(end_) { + } + + virtual ~DocumentIndexer() { + } + + virtual char CharAt(int index) { + if (index < 0 || index >= end) + return 0; + else + return pdoc->CharAt(index); + } +}; + +long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s, + bool caseSensitive, bool, bool, int flags, + int *length) { + bool posix = (flags & SCFIND_POSIX) != 0; + int increment = (minPos <= maxPos) ? 1 : -1; + + int startPos = minPos; + int endPos = maxPos; + + // Range endpoints should not be inside DBCS characters, but just in case, move them. + startPos = doc->MovePositionOutsideChar(startPos, 1, false); + endPos = doc->MovePositionOutsideChar(endPos, 1, false); + + const char *errmsg = search.Compile(s, *length, caseSensitive, posix); + if (errmsg) { + return -1; + } + // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) + // Replace first '.' with '-' in each property file variable reference: + // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) + // Replace: $(\1-\2) + int lineRangeStart = doc->LineFromPosition(startPos); + int lineRangeEnd = doc->LineFromPosition(endPos); + if ((increment == 1) && + (startPos >= doc->LineEnd(lineRangeStart)) && + (lineRangeStart < lineRangeEnd)) { + // the start position is at end of line or between line end characters. + lineRangeStart++; + startPos = doc->LineStart(lineRangeStart); + } + int pos = -1; + int lenRet = 0; + char searchEnd = s[*length - 1]; + int lineRangeBreak = lineRangeEnd + increment; + for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { + int startOfLine = doc->LineStart(line); + int endOfLine = doc->LineEnd(line); + if (increment == 1) { + if (line == lineRangeStart) { + if ((startPos != startOfLine) && (s[0] == '^')) + continue; // Can't match start of line if start position after start of line + startOfLine = startPos; + } + if (line == lineRangeEnd) { + if ((endPos != endOfLine) && (searchEnd == '$')) + continue; // Can't match end of line if end position before end of line + endOfLine = endPos; + } + } else { + if (line == lineRangeEnd) { + if ((endPos != startOfLine) && (s[0] == '^')) + continue; // Can't match start of line if end position after start of line + startOfLine = endPos; + } + if (line == lineRangeStart) { + if ((startPos != endOfLine) && (searchEnd == '$')) + continue; // Can't match end of line if start position before end of line + endOfLine = startPos; + } + } + + DocumentIndexer di(doc, endOfLine); + int success = search.Execute(di, startOfLine, endOfLine); + if (success) { + pos = search.bopat[0]; + lenRet = search.eopat[0] - search.bopat[0]; + if (increment == -1) { + // Check for the last match on this line. + int repetitions = 1000; // Break out of infinite loop + while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { + success = search.Execute(di, pos+1, endOfLine); + if (success) { + if (search.eopat[0] <= minPos) { + pos = search.bopat[0]; + lenRet = search.eopat[0] - search.bopat[0]; + } else { + success = 0; + } + } + } + } + break; + } + } + *length = lenRet; + return pos; +} + +const char *BuiltinRegex::SubstituteByPosition(Document* doc, const char *text, int *length) { + delete []substituted; + substituted = 0; + DocumentIndexer di(doc, doc->Length()); + if (!search.GrabMatches(di)) + return 0; + unsigned int lenResult = 0; + for (int i = 0; i < *length; i++) { + if (text[i] == '\\') { + if (text[i + 1] >= '1' && text[i + 1] <= '9') { + unsigned int patNum = text[i + 1] - '0'; + lenResult += search.eopat[patNum] - search.bopat[patNum]; + i++; + } else { + switch (text[i + 1]) { + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + i++; + } + lenResult++; + } + } else { + lenResult++; + } + } + substituted = new char[lenResult + 1]; + if (!substituted) + return 0; + char *o = substituted; + for (int j = 0; j < *length; j++) { + if (text[j] == '\\') { + if (text[j + 1] >= '1' && text[j + 1] <= '9') { + unsigned int patNum = text[j + 1] - '0'; + unsigned int len = search.eopat[patNum] - search.bopat[patNum]; + if (search.pat[patNum]) // Will be null if try for a match that did not occur + memcpy(o, search.pat[patNum], len); + o += len; + j++; + } else { + j++; + switch (text[j]) { + case 'a': + *o++ = '\a'; + break; + case 'b': + *o++ = '\b'; + break; + case 'f': + *o++ = '\f'; + break; + case 'n': + *o++ = '\n'; + break; + case 'r': + *o++ = '\r'; + break; + case 't': + *o++ = '\t'; + break; + case 'v': + *o++ = '\v'; + break; + default: + *o++ = '\\'; + j--; + } + } + } else { + *o++ = text[j]; + } + } + *o = '\0'; + *length = lenResult; + return substituted; +} + +#ifndef SCI_OWNREGEX + +RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) { + return new BuiltinRegex(charClassTable); +} + +#endif diff --git a/scintilla/Document.h b/scintilla/Document.h index a36c4aaf..0457b475 100644 --- a/scintilla/Document.h +++ b/scintilla/Document.h @@ -74,7 +74,24 @@ public: class DocWatcher; class DocModification; -class RESearch; +class Document; + +/** + * Interface class for regular expression searching + */ +class RegexSearchBase { +public: + virtual ~RegexSearchBase(){} + + virtual long FindText(Document* doc, int minPos, int maxPos, const char *s, + bool caseSensitive, bool word, bool wordStart, int flags, int *length) = 0; + + ///@return String with the substitutions, must remain valid until the next call or destruction + virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length) = 0; +}; + +/// Factory function for RegexSearchBase +extern RegexSearchBase* CreateRegexSearch(CharClassify *charClassTable); /** */ @@ -109,8 +126,7 @@ private: int lenWatchers; bool matchesValid; - RESearch *pre; - char *substituted; + RegexSearchBase* regex; public: int stylingBits; @@ -159,6 +175,7 @@ public: void EndUndoAction() { cb.EndUndoAction(); } void SetSavePoint(); bool IsSavePoint() { return cb.IsSavePoint(); } + const char *BufferPointer() { return cb.BufferPointer(); } int GetLineIndentation(int line); void SetLineIndentation(int line, int indent); @@ -207,7 +224,7 @@ public: int Length() const { return cb.Length(); } void Allocate(int newSize) { cb.Allocate(newSize); } long FindText(int minPos, int maxPos, const char *s, - bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix, int *length); + bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, int *length); long FindText(int iMessage, unsigned long wParam, long lParam); const char *SubstituteByPosition(const char *text, int *length); int LinesTotal() const; diff --git a/scintilla/Editor.cxx b/scintilla/Editor.cxx index 453a37b8..9c451ce0 100644 --- a/scintilla/Editor.cxx +++ b/scintilla/Editor.cxx @@ -2099,13 +2099,6 @@ void Editor::DrawEOL(Surface *surface, ViewStyle &vsDraw, PRectangle rcLine, Lin rcSegment.left = xEol + vsDraw.aveCharWidth + xStart; rcSegment.right = rcLine.right; - if (overrideBackground) { - surface->FillRectangle(rcSegment, background); - } else if (vsDraw.styles[ll->styles[ll->numCharsInLine] & styleMask].eolFilled) { - surface->FillRectangle(rcSegment, vsDraw.styles[ll->styles[ll->numCharsInLine] & styleMask].back.allocated); - } else { - surface->FillRectangle(rcSegment, vsDraw.styles[STYLE_DEFAULT].back.allocated); - } if (vsDraw.selEOLFilled && eolInSelection && vsDraw.selbackset && (line < pdoc->LinesTotal() - 1) && (vsDraw.selAlpha == SC_ALPHA_NOALPHA)) { surface->FillRectangle(rcSegment, SelectionBackground(vsDraw)); @@ -2573,7 +2566,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis // Find the most recent line with some text int lineLastWithText = line; - while (lineLastWithText > 0 && pdoc->IsWhiteLine(lineLastWithText)) { + while (lineLastWithText > Platform::Maximum(line-20, 0) && pdoc->IsWhiteLine(lineLastWithText)) { lineLastWithText--; } if (lineLastWithText < line) { @@ -2595,7 +2588,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis } int lineNextWithText = line; - while (lineNextWithText < pdoc->LinesTotal() && pdoc->IsWhiteLine(lineNextWithText)) { + while (lineNextWithText < Platform::Minimum(line+20, pdoc->LinesTotal()) && pdoc->IsWhiteLine(lineNextWithText)) { lineNextWithText++; } if (lineNextWithText > line) { @@ -3529,6 +3522,16 @@ void Editor::ClearAll() { } void Editor::ClearDocumentStyle() { + Decoration *deco = pdoc->decorations.root; + while (deco) { + // Save next in case deco deleted + Decoration *decoNext = deco->next; + if (deco->indicator < INDIC_CONTAINER) { + pdoc->decorations.SetCurrentIndicator(deco->indicator); + pdoc->DecorationFillRange(0, 0, pdoc->Length()); + } + deco = decoNext; + } pdoc->StartStyling(0, '\377'); pdoc->SetStyleFor(pdoc->Length(), 0); cs.ShowAll(); @@ -4895,7 +4898,7 @@ long Editor::FindText( (wParam & SCFIND_WHOLEWORD) != 0, (wParam & SCFIND_WORDSTART) != 0, (wParam & SCFIND_REGEXP) != 0, - (wParam & SCFIND_POSIX) != 0, + wParam, &lengthFound); if (pos != -1) { ft->chrgText.cpMin = pos; @@ -4939,7 +4942,7 @@ long Editor::SearchText( (wParam & SCFIND_WHOLEWORD) != 0, (wParam & SCFIND_WORDSTART) != 0, (wParam & SCFIND_REGEXP) != 0, - (wParam & SCFIND_POSIX) != 0, + wParam, &lengthFound); } else { pos = pdoc->FindText(searchAnchor, 0, txt, @@ -4947,7 +4950,7 @@ long Editor::SearchText( (wParam & SCFIND_WHOLEWORD) != 0, (wParam & SCFIND_WORDSTART) != 0, (wParam & SCFIND_REGEXP) != 0, - (wParam & SCFIND_POSIX) != 0, + wParam, &lengthFound); } @@ -4969,7 +4972,7 @@ long Editor::SearchInTarget(const char *text, int length) { (searchFlags & SCFIND_WHOLEWORD) != 0, (searchFlags & SCFIND_WORDSTART) != 0, (searchFlags & SCFIND_REGEXP) != 0, - (searchFlags & SCFIND_POSIX) != 0, + searchFlags, &lengthFound); if (pos != -1) { targetStart = pos; @@ -7116,7 +7119,8 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { break; case SCI_HIDELINES: - cs.SetVisible(wParam, lParam, false); + if (wParam > 0) + cs.SetVisible(wParam, lParam, false); SetScrollBars(); Redraw(); break; @@ -7668,6 +7672,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { case SCI_GETPASTECONVERTENDINGS: return convertPastes ? 1 : 0; + case SCI_GETCHARACTERPOINTER: + return reinterpret_cast(pdoc->BufferPointer()); + default: return DefWndProc(iMessage, wParam, lParam); } diff --git a/scintilla/ExternalLexer.cxx b/scintilla/ExternalLexer.cxx index 0344debd..a4e29e31 100644 --- a/scintilla/ExternalLexer.cxx +++ b/scintilla/ExternalLexer.cxx @@ -172,13 +172,13 @@ LexerLibrary::~LexerLibrary() { void LexerLibrary::Release() { //TODO maintain a list of lexers created, and delete them! LexerMinder *lm; - LexerMinder *next; + LexerMinder *lmNext; lm = first; while (NULL != lm) { - next = lm->next; + lmNext = lm->next; delete lm->self; delete lm; - lm = next; + lm = lmNext; } first = NULL; diff --git a/scintilla/LexAsm.cxx b/scintilla/LexAsm.cxx index 9dd4df45..17c93842 100644 --- a/scintilla/LexAsm.cxx +++ b/scintilla/LexAsm.cxx @@ -37,8 +37,8 @@ static inline bool IsAWordStart(const int ch) { ch == '%' || ch == '@' || ch == '$' || ch == '?'); } -static inline bool IsAsmOperator(char ch) { - if (isalnum(ch)) +static inline bool IsAsmOperator(const int ch) { + if ((ch < 0x80) && (isalnum(ch))) return false; // '.' left out as it is used to make up numbers if (ch == '*' || ch == '/' || ch == '-' || ch == '+' || @@ -89,7 +89,7 @@ static void ColouriseAsmDoc(unsigned int startPos, int length, int initStyle, Wo // Determine if the current state should terminate. if (sc.state == SCE_ASM_OPERATOR) { - if (!IsAsmOperator(static_cast(sc.ch))) { + if (!IsAsmOperator(sc.ch)) { sc.SetState(SCE_ASM_DEFAULT); } }else if (sc.state == SCE_ASM_NUMBER) { @@ -149,7 +149,7 @@ static void ColouriseAsmDoc(unsigned int startPos, int length, int initStyle, Wo if (sc.state == SCE_ASM_DEFAULT) { if (sc.ch == ';'){ sc.SetState(SCE_ASM_COMMENT); - } else if (isdigit(sc.ch) || (sc.ch == '.' && isdigit(sc.chNext))) { + } else if (isascii(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && isascii(sc.chNext) && isdigit(sc.chNext)))) { sc.SetState(SCE_ASM_NUMBER); } else if (IsAWordStart(sc.ch)) { sc.SetState(SCE_ASM_IDENTIFIER); @@ -157,7 +157,7 @@ static void ColouriseAsmDoc(unsigned int startPos, int length, int initStyle, Wo sc.SetState(SCE_ASM_STRING); } else if (sc.ch == '\'') { sc.SetState(SCE_ASM_CHARACTER); - } else if (IsAsmOperator(static_cast(sc.ch))) { + } else if (IsAsmOperator(sc.ch)) { sc.SetState(SCE_ASM_OPERATOR); } } diff --git a/scintilla/LexBash.cxx b/scintilla/LexBash.cxx index f0376b94..7b475a7d 100644 --- a/scintilla/LexBash.cxx +++ b/scintilla/LexBash.cxx @@ -2,8 +2,8 @@ /** @file LexBash.cxx ** Lexer for Bash. **/ -// Copyright 2004-2007 by Neil Hodgson -// Adapted from LexPerl by Kein-Hong Man 2004 +// Copyright 2004-2008 by Neil Hodgson +// Adapted from LexPerl by Kein-Hong Man 2004 // The License.txt file describes the conditions under which this software may be distributed. #include @@ -16,9 +16,17 @@ #include "PropSet.h" #include "Accessor.h" +#include "StyleContext.h" #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" +#include "CharacterSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +#define HERE_DELIM_MAX 256 // define this if you want 'invalid octals' to be marked as errors // usually, this is not a good idea, permissive lexing is better @@ -32,13 +40,7 @@ #define BASH_BASE_OCTAL_ERROR 69 #endif -#define HERE_DELIM_MAX 256 - -#ifdef SCI_NAMESPACE -using namespace Scintilla; -#endif - -static inline int translateBashDigit(char ch) { +static inline int translateBashDigit(int ch) { if (ch >= '0' && ch <= '9') { return ch - '0'; } else if (ch >= 'a' && ch <= 'z') { @@ -53,407 +55,210 @@ static inline int translateBashDigit(char ch) { return BASH_BASE_ERROR; } -static inline bool isEOLChar(char ch) { - return (ch == '\r') || (ch == '\n'); -} - -static bool isSingleCharOp(char ch) { - char strCharSet[2]; - strCharSet[0] = ch; - strCharSet[1] = '\0'; - return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet)); -} - -static inline bool isBashOperator(char ch) { - if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' || - ch == '(' || ch == ')' || ch == '-' || ch == '+' || - ch == '=' || ch == '|' || ch == '{' || ch == '}' || - ch == '[' || ch == ']' || ch == ':' || ch == ';' || - ch == '>' || ch == ',' || ch == '/' || ch == '<' || - ch == '?' || ch == '!' || ch == '.' || ch == '~' || - ch == '@') - return true; - return false; -} - -static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { - char s[100]; - for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) { - s[i] = styler[start + i]; - s[i + 1] = '\0'; - } - char chAttr = SCE_SH_IDENTIFIER; - if (keywords.InList(s)) - chAttr = SCE_SH_WORD; - styler.ColourTo(end, chAttr); - return chAttr; -} - -static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) { +static inline int getBashNumberBase(char *s) { + int i = 0; int base = 0; - for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) { - base = base * 10 + (styler[start + i] - '0'); + while (*s) { + base = base * 10 + (*s++ - '0'); + i++; } - if (base > 64 || (end - start) > 1) { + if (base > 64 || i > 2) { return BASH_BASE_ERROR; } return base; } -static inline bool isEndVar(char ch) { - return !isalnum(ch) && ch != '$' && ch != '_'; -} - -static inline bool isNonQuote(char ch) { - return isalnum(ch) || ch == '_'; -} - -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { - if ((pos + static_cast(strlen(val))) >= lengthDoc) { - return false; - } - while (*val) { - if (*val != styler[pos++]) { - return false; - } - val++; - } - return true; -} - -static char opposite(char ch) { - if (ch == '(') - return ')'; - if (ch == '[') - return ']'; - if (ch == '{') - return '}'; - if (ch == '<') - return '>'; +static int opposite(int ch) { + if (ch == '(') return ')'; + if (ch == '[') return ']'; + if (ch == '{') return '}'; + if (ch == '<') return '>'; return ch; } static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, - WordList *keywordlists[], Accessor &styler) { - - // Lexer for bash often has to backtrack to start of current style to determine - // which characters are being used as quotes, how deeply nested is the - // start position and what the termination string is for here documents + WordList *keywordlists[], Accessor &styler) { WordList &keywords = *keywordlists[0]; - class HereDocCls { + CharacterSet setWordStart(CharacterSet::setAlpha, "_"); + // note that [+-] are often parts of identifiers in shell scripts + CharacterSet setWord(CharacterSet::setAlphaNum, "._+-"); + CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/(ch); + Delimiter[DelimiterLength] = '\0'; + } ~HereDocCls() { delete []Delimiter; } }; HereDocCls HereDoc; - class QuoteCls { + class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl) public: - int Rep; - int Count; - char Up; - char Down; + int Count; + int Up, Down; QuoteCls() { - this->New(1); - } - void New(int r) { - Rep = r; Count = 0; Up = '\0'; Down = '\0'; } - void Open(char u) { + void Open(int u) { Count++; Up = u; Down = opposite(Up); } + void Start(int u) { + Count = 0; + Open(u); + } }; QuoteCls Quote; - int state = initStyle; int numBase = 0; - unsigned int lengthDoc = startPos + length; + int digit; + unsigned int endPos = startPos + length; - // If in a long distance lexical state, seek to the beginning to find quote characters - // Bash strings can be multi-line with embedded newlines, so backtrack. - // Bash numbers have additional state during lexing, so backtrack too. - if (state == SCE_SH_HERE_Q) { + // Backtrack to beginning of style if required... + // If in a long distance lexical state, backtrack to find quote characters + if (initStyle == SCE_SH_HERE_Q) { while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) { startPos--; } startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); + initStyle = styler.StyleAt(startPos - 1); } - if (state == SCE_SH_STRING - || state == SCE_SH_BACKTICKS - || state == SCE_SH_CHARACTER - || state == SCE_SH_NUMBER - || state == SCE_SH_IDENTIFIER - || state == SCE_SH_COMMENTLINE - ) { - while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { + // Bash strings can be multi-line with embedded newlines, so backtrack. + // Bash numbers have additional state during lexing, so backtrack too. + if (initStyle == SCE_SH_STRING + || initStyle == SCE_SH_BACKTICKS + || initStyle == SCE_SH_CHARACTER + || initStyle == SCE_SH_NUMBER + || initStyle == SCE_SH_IDENTIFIER + || initStyle == SCE_SH_COMMENTLINE) { + while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { startPos--; } - state = SCE_SH_DEFAULT; + initStyle = SCE_SH_DEFAULT; } - styler.StartAt(startPos); - char chPrev = styler.SafeGetCharAt(startPos - 1); - if (startPos == 0) - chPrev = '\n'; - char chNext = styler[startPos]; - styler.StartSegment(startPos); + StyleContext sc(startPos, endPos - startPos, initStyle, styler); - for (unsigned int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - // if the current character is not consumed due to the completion of an - // earlier style, lexing can be restarted via a simple goto - restartLexer: - chNext = styler.SafeGetCharAt(i + 1); - char chNext2 = styler.SafeGetCharAt(i + 2); + for (; sc.More(); sc.Forward()) { - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - chPrev = ' '; - i += 1; - continue; - } - - if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows - styler.ColourTo(i, state); - chPrev = ch; - continue; - } - - if (HereDoc.State == 1 && isEOLChar(ch)) { - // Begin of here-doc (the line after the here-doc delimiter): - // Lexically, the here-doc starts from the next line after the >>, but the - // first line of here-doc seem to follow the style of the last EOL sequence - HereDoc.State = 2; - if (HereDoc.Quoted) { - if (state == SCE_SH_HERE_DELIM) { - // Missing quote at end of string! We are stricter than bash. - // Colour here-doc anyway while marking this bit as an error. - state = SCE_SH_ERROR; - } - styler.ColourTo(i - 1, state); - // HereDoc.Quote always == '\'' - state = SCE_SH_HERE_Q; - } else { - styler.ColourTo(i - 1, state); - // always switch - state = SCE_SH_HERE_Q; - } - } - - if (state == SCE_SH_DEFAULT) { - if (ch == '\\') { // escaped character - if (i < lengthDoc - 1) - i++; - ch = chNext; - chNext = chNext2; - styler.ColourTo(i, SCE_SH_IDENTIFIER); - } else if (isdigit(ch)) { - state = SCE_SH_NUMBER; - numBase = BASH_BASE_DECIMAL; - if (ch == '0') { // hex,octal - if (chNext == 'x' || chNext == 'X') { - numBase = BASH_BASE_HEX; - i++; - ch = chNext; - chNext = chNext2; - } else if (isdigit(chNext)) { -#ifdef PEDANTIC_OCTAL - numBase = BASH_BASE_OCTAL; -#else - numBase = BASH_BASE_HEX; -#endif + // Determine if the current state should terminate. + switch (sc.state) { + case SCE_SH_OPERATOR: + sc.SetState(SCE_SH_DEFAULT); + break; + case SCE_SH_WORD: + // "." never used in Bash variable names but used in file names + if (!setWord.Contains(sc.ch)) { + char s[1000]; + sc.GetCurrent(s, sizeof(s)); + if (s[0] != '-' && // for file operators + !keywords.InList(s)) { + sc.ChangeState(SCE_SH_IDENTIFIER); } + sc.SetState(SCE_SH_DEFAULT); } - } else if (iswordstart(ch)) { - state = SCE_SH_WORD; - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - // We need that if length of word == 1! - // This test is copied from the SCE_SH_WORD handler. - classifyWordBash(styler.GetStartSegment(), i, keywords, styler); - state = SCE_SH_DEFAULT; + break; + case SCE_SH_IDENTIFIER: + if (sc.chPrev == '\\') { // for escaped chars + sc.ForwardSetState(SCE_SH_DEFAULT); + } else if (!setWord.Contains(sc.ch)) { + sc.SetState(SCE_SH_DEFAULT); } - } else if (ch == '#') { - state = SCE_SH_COMMENTLINE; - } else if (ch == '\"') { - state = SCE_SH_STRING; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '\'') { - state = SCE_SH_CHARACTER; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '`') { - state = SCE_SH_BACKTICKS; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '$') { - if (chNext == '{') { - state = SCE_SH_PARAM; - goto startQuote; - } else if (chNext == '\'') { - state = SCE_SH_CHARACTER; - goto startQuote; - } else if (chNext == '"') { - state = SCE_SH_STRING; - goto startQuote; - } else if (chNext == '(' && chNext2 == '(') { - styler.ColourTo(i, SCE_SH_OPERATOR); - state = SCE_SH_DEFAULT; - goto skipChar; - } else if (chNext == '(' || chNext == '`') { - state = SCE_SH_BACKTICKS; - startQuote: - Quote.New(1); - Quote.Open(chNext); - goto skipChar; - } else { - state = SCE_SH_SCALAR; - skipChar: - i++; - ch = chNext; - chNext = chNext2; - } - } else if (ch == '*') { - if (chNext == '*') { // exponentiation - i++; - ch = chNext; - chNext = chNext2; - } - styler.ColourTo(i, SCE_SH_OPERATOR); - } else if (ch == '<' && chNext == '<') { - state = SCE_SH_HERE_DELIM; - HereDoc.State = 0; - HereDoc.Indent = false; - } else if (ch == '-' // file test operators - && isSingleCharOp(chNext) - && !isalnum((chNext2 = styler.SafeGetCharAt(i+2))) - && isspace(chPrev)) { - styler.ColourTo(i + 1, SCE_SH_WORD); - state = SCE_SH_DEFAULT; - i++; - ch = chNext; - chNext = chNext2; - } else if (isBashOperator(ch)) { - styler.ColourTo(i, SCE_SH_OPERATOR); - } else { - // keep colouring defaults to make restart easier - styler.ColourTo(i, SCE_SH_DEFAULT); - } - } else if (state == SCE_SH_NUMBER) { - int digit = translateBashDigit(ch); - if (numBase == BASH_BASE_DECIMAL) { - if (ch == '#') { - numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler); - if (numBase == BASH_BASE_ERROR) // take the rest as comment - goto numAtEnd; - } else if (!isdigit(ch)) - goto numAtEnd; - } else if (numBase == BASH_BASE_HEX) { - if ((digit < 16) || (digit >= 36 && digit <= 41)) { - // hex digit 0-9a-fA-F - } else - goto numAtEnd; + break; + case SCE_SH_NUMBER: + digit = translateBashDigit(sc.ch); + if (numBase == BASH_BASE_DECIMAL) { + if (sc.ch == '#') { + char s[10]; + sc.GetCurrent(s, sizeof(s)); + numBase = getBashNumberBase(s); + if (numBase != BASH_BASE_ERROR) + break; + } else if (IsADigit(sc.ch)) + break; + } else if (numBase == BASH_BASE_HEX) { + if (IsADigit(sc.ch, 16)) + break; #ifdef PEDANTIC_OCTAL - } else if (numBase == BASH_BASE_OCTAL || - numBase == BASH_BASE_OCTAL_ERROR) { - if (digit > 7) { + } else if (numBase == BASH_BASE_OCTAL || + numBase == BASH_BASE_OCTAL_ERROR) { + if (digit <= 7) + break; if (digit <= 9) { - numBase = BASH_BASE_OCTAL_ERROR; - } else - goto numAtEnd; - } -#endif - } else if (numBase == BASH_BASE_ERROR) { - if (digit > 9) - goto numAtEnd; - } else { // DD#DDDD number style handling - if (digit != BASH_BASE_ERROR) { - if (numBase <= 36) { - // case-insensitive if base<=36 - if (digit >= 36) digit -= 26; + numBase = BASH_BASE_OCTAL_ERROR; + break; } - if (digit >= numBase) { +#endif + } else if (numBase == BASH_BASE_ERROR) { + if (digit <= 9) + break; + } else { // DD#DDDD number style handling + if (digit != BASH_BASE_ERROR) { + if (numBase <= 36) { + // case-insensitive if base<=36 + if (digit >= 36) digit -= 26; + } + if (digit < numBase) + break; if (digit <= 9) { numBase = BASH_BASE_ERROR; - } else - goto numAtEnd; + break; + } } - } else { - numAtEnd: - if (numBase == BASH_BASE_ERROR + } + // fallthrough when number is at an end or error + if (numBase == BASH_BASE_ERROR #ifdef PEDANTIC_OCTAL - || numBase == BASH_BASE_OCTAL_ERROR + || numBase == BASH_BASE_OCTAL_ERROR #endif - ) - state = SCE_SH_ERROR; - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + ) { + sc.ChangeState(SCE_SH_ERROR); } - } - } else if (state == SCE_SH_WORD) { - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - // "." never used in Bash variable names - // but used in file names - classifyWordBash(styler.GetStartSegment(), i, keywords, styler); - state = SCE_SH_DEFAULT; - ch = ' '; - } - } else if (state == SCE_SH_IDENTIFIER) { - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - styler.ColourTo(i, SCE_SH_IDENTIFIER); - state = SCE_SH_DEFAULT; - ch = ' '; - } - } else { - if (state == SCE_SH_COMMENTLINE) { - if (ch == '\\' && isEOLChar(chNext)) { + sc.SetState(SCE_SH_DEFAULT); + break; + case SCE_SH_COMMENTLINE: + if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) { // comment continuation - if (chNext == '\r' && chNext2 == '\n') { - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else { - i++; - ch = chNext; - chNext = chNext2; + sc.Forward(); + if (sc.ch == '\r' && sc.chNext == '\n') { + sc.Forward(); } - } else if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; - } else if (isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + } else if (sc.atLineEnd) { + sc.ForwardSetState(SCE_SH_DEFAULT); } - } else if (state == SCE_SH_HERE_DELIM) { - // + break; + case SCE_SH_HERE_DELIM: // From Bash info: // --------------- // Specifier format is: <<[-]WORD @@ -461,150 +266,194 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, // Whitespace acceptable after <<[-] operator // if (HereDoc.State == 0) { // '<<' encountered - HereDoc.State = 1; - HereDoc.Quote = chNext; + HereDoc.Quote = sc.chNext; HereDoc.Quoted = false; HereDoc.DelimiterLength = 0; HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - if (chNext == '\'' || chNext == '\"') { // a quoted here-doc delimiter (' or ") - i++; - ch = chNext; - chNext = chNext2; + if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ") + sc.Forward(); HereDoc.Quoted = true; - } else if (!HereDoc.Indent && chNext == '-') { // <<- indent case + HereDoc.State = 1; + } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case HereDoc.Indent = true; - HereDoc.State = 0; - } else if (isalpha(chNext) || chNext == '_' || chNext == '\\' - || chNext == '-' || chNext == '+' || chNext == '!') { + } else if (setHereDoc.Contains(sc.chNext)) { // an unquoted here-doc delimiter, no special handling - // TODO check what exactly bash considers part of the delim - } else if (chNext == '<') { // HERE string <<< - i++; - ch = chNext; - chNext = chNext2; - styler.ColourTo(i, SCE_SH_HERE_DELIM); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; - } else if (isspacechar(chNext)) { + // TODO check what exactly bash considers part of the delim + HereDoc.State = 1; + } else if (sc.chNext == '<') { // HERE string <<< + sc.Forward(); + sc.ForwardSetState(SCE_SH_DEFAULT); + } else if (IsASpace(sc.chNext)) { // eat whitespace - HereDoc.State = 0; - } else if (isdigit(chNext) || chNext == '=' || chNext == '$') { + } else if (setLeftShift.Contains(sc.chNext)) { // left shift << or <<= operator cases - styler.ColourTo(i, SCE_SH_OPERATOR); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; + sc.ChangeState(SCE_SH_OPERATOR); + sc.ForwardSetState(SCE_SH_DEFAULT); } else { // symbols terminates; deprecated zero-length delimiter + HereDoc.State = 1; } } else if (HereDoc.State == 1) { // collect the delimiter if (HereDoc.Quoted) { // a quoted here-doc delimiter - if (ch == HereDoc.Quote) { // closing quote => end of delimiter - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter + sc.ForwardSetState(SCE_SH_DEFAULT); } else { - if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote - i++; - ch = chNext; - chNext = chNext2; + if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote + sc.Forward(); } - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + HereDoc.Append(sc.ch); } } else { // an unquoted here-doc delimiter - if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+' || ch == '!') { - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - } else if (ch == '\\') { + if (setHereDoc2.Contains(sc.ch)) { + HereDoc.Append(sc.ch); + } else if (sc.ch == '\\') { // skip escape prefix } else { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + sc.SetState(SCE_SH_DEFAULT); } } - if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { - styler.ColourTo(i - 1, state); - state = SCE_SH_ERROR; - goto restartLexer; + if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup + sc.SetState(SCE_SH_ERROR); + HereDoc.State = 0; } } - } else if (HereDoc.State == 2) { - // state == SCE_SH_HERE_Q - if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { - if (!HereDoc.Indent && isEOLChar(chPrev)) { - endHereDoc: - // standard HERE delimiter - i += HereDoc.DelimiterLength; - chPrev = styler.SafeGetCharAt(i - 1); - ch = styler.SafeGetCharAt(i); - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; - goto restartLexer; - } - chNext = styler.SafeGetCharAt(i + 1); - } else if (HereDoc.Indent) { - // indented HERE delimiter - unsigned int bk = (i > 0)? i - 1: 0; - while (i > 0) { - ch = styler.SafeGetCharAt(bk--); - if (isEOLChar(ch)) { - goto endHereDoc; - } else if (!isspacechar(ch)) { - break; // got leading non-whitespace - } + break; + case SCE_SH_HERE_Q: + // HereDoc.State == 2 + if (sc.atLineStart) { + sc.SetState(SCE_SH_HERE_Q); + int prefixws = 0; + while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix + sc.Forward(); + prefixws++; + } + if (prefixws > 0) + sc.SetState(SCE_SH_HERE_Q); + while (!sc.atLineEnd) { + sc.Forward(); + } + char s[HERE_DELIM_MAX]; + sc.GetCurrent(s, sizeof(s)); + if (strcmp(HereDoc.Delimiter, s) == 0) { + if ((prefixws > 0 && HereDoc.Indent) || // indentation rule + (prefixws == 0 && !HereDoc.Indent)) { + sc.SetState(SCE_SH_DEFAULT); + break; } } } - } else if (state == SCE_SH_SCALAR) { // variable names - if (isEndVar(ch)) { - if ((state == SCE_SH_SCALAR) - && i == (styler.GetStartSegment() + 1)) { + break; + case SCE_SH_SCALAR: // variable names + if (!setParam.Contains(sc.ch)) { + if (sc.LengthCurrent() == 1) { // Special variable: $(, $_ etc. - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + sc.ForwardSetState(SCE_SH_DEFAULT); } else { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + sc.SetState(SCE_SH_DEFAULT); } } - } else if (state == SCE_SH_STRING - || state == SCE_SH_CHARACTER - || state == SCE_SH_BACKTICKS - || state == SCE_SH_PARAM - ) { - if (!Quote.Down && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else if (ch == Quote.Down) { + break; + case SCE_SH_STRING: // delimited styles + case SCE_SH_CHARACTER: + case SCE_SH_BACKTICKS: + case SCE_SH_PARAM: + if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { Quote.Count--; if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; - ch = ' '; - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } + sc.ForwardSetState(SCE_SH_DEFAULT); } - } else if (ch == Quote.Up) { + } else if (sc.ch == Quote.Up) { Quote.Count++; } + break; + } + + // Must check end of HereDoc state 1 before default state is handled + if (HereDoc.State == 1 && sc.atLineEnd) { + // Begin of here-doc (the line after the here-doc delimiter): + // Lexically, the here-doc starts from the next line after the >>, but the + // first line of here-doc seem to follow the style of the last EOL sequence + HereDoc.State = 2; + if (HereDoc.Quoted) { + if (sc.state == SCE_SH_HERE_DELIM) { + // Missing quote at end of string! We are stricter than bash. + // Colour here-doc anyway while marking this bit as an error. + sc.ChangeState(SCE_SH_ERROR); + } + // HereDoc.Quote always == '\'' + } + sc.SetState(SCE_SH_HERE_Q); + } + + // Determine if a new state should be entered. + if (sc.state == SCE_SH_DEFAULT) { + if (sc.ch == '\\') { // escaped character + sc.SetState(SCE_SH_IDENTIFIER); + } else if (IsADigit(sc.ch)) { + sc.SetState(SCE_SH_NUMBER); + numBase = BASH_BASE_DECIMAL; + if (sc.ch == '0') { // hex,octal + if (sc.chNext == 'x' || sc.chNext == 'X') { + numBase = BASH_BASE_HEX; + sc.Forward(); + } else if (IsADigit(sc.chNext)) { +#ifdef PEDANTIC_OCTAL + numBase = BASH_BASE_OCTAL; +#else + numBase = BASH_BASE_HEX; +#endif + } + } + } else if (setWordStart.Contains(sc.ch)) { + sc.SetState(SCE_SH_WORD); + } else if (sc.ch == '#') { + sc.SetState(SCE_SH_COMMENTLINE); + } else if (sc.ch == '\"') { + sc.SetState(SCE_SH_STRING); + Quote.Start(sc.ch); + } else if (sc.ch == '\'') { + sc.SetState(SCE_SH_CHARACTER); + Quote.Start(sc.ch); + } else if (sc.ch == '`') { + sc.SetState(SCE_SH_BACKTICKS); + Quote.Start(sc.ch); + } else if (sc.ch == '$') { + sc.SetState(SCE_SH_SCALAR); + sc.Forward(); + if (sc.ch == '{') { + sc.ChangeState(SCE_SH_PARAM); + } else if (sc.ch == '\'') { + sc.ChangeState(SCE_SH_CHARACTER); + } else if (sc.ch == '"') { + sc.ChangeState(SCE_SH_STRING); + } else if (sc.ch == '(' || sc.ch == '`') { + sc.ChangeState(SCE_SH_BACKTICKS); + if (sc.chNext == '(') { // $(( is lexed as operator + sc.ChangeState(SCE_SH_OPERATOR); + } + } else { + continue; // scalar has no delimiter pair + } + // fallthrough, open delim for $[{'"(`] + Quote.Start(sc.ch); + } else if (sc.Match('<', '<')) { + sc.SetState(SCE_SH_HERE_DELIM); + HereDoc.State = 0; + HereDoc.Indent = false; + } else if (sc.ch == '-' && // one-char file test operators + setSingleCharOp.Contains(sc.chNext) && + !setWord.Contains(sc.GetRelative(2)) && + IsASpace(sc.chPrev)) { + sc.SetState(SCE_SH_WORD); + sc.Forward(); + } else if (setBashOperator.Contains(sc.ch)) { + sc.SetState(SCE_SH_OPERATOR); } } - if (state == SCE_SH_ERROR) { - break; - } - chPrev = ch; } - styler.ColourTo(lengthDoc - 1, state); + sc.Complete(); } static bool IsCommentLine(int line, Accessor &styler) { @@ -621,7 +470,7 @@ static bool IsCommentLine(int line, Accessor &styler) { } static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], - Accessor &styler) { + Accessor &styler) { bool foldComment = styler.GetPropertyInt("fold.comment") != 0; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; unsigned int endPos = startPos + length; @@ -637,16 +486,16 @@ static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], int style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); - // Comment folding + // Comment folding if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) - { - if (!IsCommentLine(lineCurrent - 1, styler) - && IsCommentLine(lineCurrent + 1, styler)) - levelCurrent++; - else if (IsCommentLine(lineCurrent - 1, styler) - && !IsCommentLine(lineCurrent+1, styler)) - levelCurrent--; - } + { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent + 1, styler)) + levelCurrent--; + } if (style == SCE_SH_OPERATOR) { if (ch == '{') { levelCurrent++; diff --git a/scintilla/LexCPP.cxx b/scintilla/LexCPP.cxx index 49dad8b9..c2933f41 100644 --- a/scintilla/LexCPP.cxx +++ b/scintilla/LexCPP.cxx @@ -63,7 +63,7 @@ static void ColouriseCppDoc(unsigned int startPos, int length, int initStyle, Wo CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-"); CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-"); - CharacterSet setDoxygen(CharacterSet::setLower, "$@\\&<>#{}[]"); + CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]"); CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true); diff --git a/scintilla/LexCSS.cxx b/scintilla/LexCSS.cxx index f5c112d6..3b139cdc 100644 --- a/scintilla/LexCSS.cxx +++ b/scintilla/LexCSS.cxx @@ -28,11 +28,16 @@ using namespace Scintilla; static inline bool IsAWordChar(const unsigned int ch) { - return (isalnum(ch) || ch == '-' || ch == '_' || ch >= 161); // _ is not in fact correct CSS word-character + /* FIXME: + * The CSS spec allows "ISO 10646 characters U+00A1 and higher" to be treated as word chars. + * Unfortunately, we are only getting string bytes here, and not full unicode characters. We cannot guarantee + * that our byte is between U+0080 - U+00A0 (to return false), so we have to allow all characters U+0080 and higher + */ + return ch >= 0x80 || isalnum(ch) || ch == '-' || ch == '_'; } -inline bool IsCssOperator(const char ch) { - if (!isalnum(ch) && +inline bool IsCssOperator(const int ch) { + if (!((ch < 0x80) && isalnum(ch)) && (ch == '{' || ch == '}' || ch == ':' || ch == ',' || ch == ';' || ch == '.' || ch == '#' || ch == '!' || ch == '@' || /* CSS2 */ @@ -44,15 +49,21 @@ inline bool IsCssOperator(const char ch) { } static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { - WordList &keywords = *keywordlists[0]; + WordList &css1Props = *keywordlists[0]; WordList &pseudoClasses = *keywordlists[1]; - WordList &keywords2 = *keywordlists[2]; + WordList &css2Props = *keywordlists[2]; + WordList &css3Props = *keywordlists[3]; + WordList &pseudoElements = *keywordlists[4]; + WordList &exProps = *keywordlists[5]; + WordList &exPseudoClasses = *keywordlists[6]; + WordList &exPseudoElements = *keywordlists[7]; StyleContext sc(startPos, length, initStyle, styler); int lastState = -1; // before operator int lastStateC = -1; // before comment int op = ' '; // last operator + int opPrev = ' '; // last operator for (; sc.More(); sc.Forward()) { if (sc.state == SCE_CSS_COMMENT && sc.Match('*', '/')) { @@ -64,6 +75,7 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo if ((lastStateC = styler.StyleAt(i-1)) != SCE_CSS_COMMENT) { if (lastStateC == SCE_CSS_OPERATOR) { op = styler.SafeGetCharAt(i-1); + opPrev = styler.SafeGetCharAt(i-2); while (--i) { lastState = styler.StyleAt(i-1); if (lastState != SCE_CSS_OPERATOR && lastState != SCE_CSS_COMMENT) @@ -100,6 +112,7 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo if (op == ' ') { unsigned int i = startPos; op = styler.SafeGetCharAt(i-1); + opPrev = styler.SafeGetCharAt(i-2); while (--i) { lastState = styler.StyleAt(i-1); if (lastState != SCE_CSS_OPERATOR && lastState != SCE_CSS_COMMENT) @@ -111,19 +124,15 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo if (lastState == SCE_CSS_DEFAULT) sc.SetState(SCE_CSS_DIRECTIVE); break; - case '*': - if (lastState == SCE_CSS_DEFAULT) - sc.SetState(SCE_CSS_TAG); - break; case '>': case '+': - if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_CLASS - || lastState == SCE_CSS_ID || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) sc.SetState(SCE_CSS_DEFAULT); break; case '[': - if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_DEFAULT || - lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) sc.SetState(SCE_CSS_ATTRIBUTE); break; case ']': @@ -138,27 +147,44 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo break; case '}': if (lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_VALUE || lastState == SCE_CSS_IMPORTANT || - lastState == SCE_CSS_IDENTIFIER || lastState == SCE_CSS_IDENTIFIER2) + lastState == SCE_CSS_IDENTIFIER || lastState == SCE_CSS_IDENTIFIER2 || lastState == SCE_CSS_IDENTIFIER3) sc.SetState(SCE_CSS_DEFAULT); break; + case '(': + if (lastState == SCE_CSS_PSEUDOCLASS) + sc.SetState(SCE_CSS_TAG); + else if (lastState == SCE_CSS_EXTENDED_PSEUDOCLASS) + sc.SetState(SCE_CSS_EXTENDED_PSEUDOCLASS); + break; + case ')': + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS || + lastState == SCE_CSS_PSEUDOELEMENT || lastState == SCE_CSS_EXTENDED_PSEUDOELEMENT) + sc.SetState(SCE_CSS_TAG); + break; case ':': - if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_DEFAULT || - lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS || + lastState == SCE_CSS_PSEUDOELEMENT || lastState == SCE_CSS_EXTENDED_PSEUDOELEMENT) sc.SetState(SCE_CSS_PSEUDOCLASS); - else if (lastState == SCE_CSS_IDENTIFIER || lastState == SCE_CSS_IDENTIFIER2 || lastState == SCE_CSS_UNKNOWN_IDENTIFIER) + else if (lastState == SCE_CSS_IDENTIFIER || lastState == SCE_CSS_IDENTIFIER2 || + lastState == SCE_CSS_IDENTIFIER3 || lastState == SCE_CSS_EXTENDED_IDENTIFIER || + lastState == SCE_CSS_UNKNOWN_IDENTIFIER) sc.SetState(SCE_CSS_VALUE); break; case '.': - if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_DEFAULT || - lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) sc.SetState(SCE_CSS_CLASS); break; case '#': - if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_DEFAULT || - lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) + if (lastState == SCE_CSS_TAG || lastState == SCE_CSS_DEFAULT || lastState == SCE_CSS_CLASS || lastState == SCE_CSS_ID || + lastState == SCE_CSS_PSEUDOCLASS || lastState == SCE_CSS_EXTENDED_PSEUDOCLASS || lastState == SCE_CSS_UNKNOWN_PSEUDOCLASS) sc.SetState(SCE_CSS_ID); break; case ',': + case '|': + case '~': if (lastState == SCE_CSS_TAG) sc.SetState(SCE_CSS_DEFAULT); break; @@ -181,11 +207,19 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo continue; } + if (sc.ch == '*' && sc.state == SCE_CSS_DEFAULT) { + sc.SetState(SCE_CSS_TAG); + continue; + } + if (IsAWordChar(sc.chPrev) && ( - sc.state == SCE_CSS_IDENTIFIER || sc.state == SCE_CSS_IDENTIFIER2 - || sc.state == SCE_CSS_UNKNOWN_IDENTIFIER - || sc.state == SCE_CSS_PSEUDOCLASS || sc.state == SCE_CSS_UNKNOWN_PSEUDOCLASS - || sc.state == SCE_CSS_IMPORTANT + sc.state == SCE_CSS_IDENTIFIER || sc.state == SCE_CSS_IDENTIFIER2 || + sc.state == SCE_CSS_IDENTIFIER3 || sc.state == SCE_CSS_EXTENDED_IDENTIFIER || + sc.state == SCE_CSS_UNKNOWN_IDENTIFIER || + sc.state == SCE_CSS_PSEUDOCLASS || sc.state == SCE_CSS_PSEUDOELEMENT || + sc.state == SCE_CSS_EXTENDED_PSEUDOCLASS || sc.state == SCE_CSS_EXTENDED_PSEUDOELEMENT || + sc.state == SCE_CSS_UNKNOWN_PSEUDOCLASS || + sc.state == SCE_CSS_IMPORTANT )) { char s[100]; sc.GetCurrentLowered(s, sizeof(s)); @@ -194,27 +228,36 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo s2++; switch (sc.state) { case SCE_CSS_IDENTIFIER: - if (!keywords.InList(s2)) { - if (keywords2.InList(s2)) { - sc.ChangeState(SCE_CSS_IDENTIFIER2); - } else { - sc.ChangeState(SCE_CSS_UNKNOWN_IDENTIFIER); - } - } - break; + case SCE_CSS_IDENTIFIER2: + case SCE_CSS_IDENTIFIER3: + case SCE_CSS_EXTENDED_IDENTIFIER: case SCE_CSS_UNKNOWN_IDENTIFIER: - if (keywords.InList(s2)) + if (css1Props.InList(s2)) sc.ChangeState(SCE_CSS_IDENTIFIER); - else if (keywords2.InList(s2)) + else if (css2Props.InList(s2)) sc.ChangeState(SCE_CSS_IDENTIFIER2); + else if (css3Props.InList(s2)) + sc.ChangeState(SCE_CSS_IDENTIFIER3); + else if (exProps.InList(s2)) + sc.ChangeState(SCE_CSS_EXTENDED_IDENTIFIER); + else + sc.ChangeState(SCE_CSS_UNKNOWN_IDENTIFIER); break; case SCE_CSS_PSEUDOCLASS: - if (!pseudoClasses.InList(s2)) - sc.ChangeState(SCE_CSS_UNKNOWN_PSEUDOCLASS); - break; + case SCE_CSS_PSEUDOELEMENT: + case SCE_CSS_EXTENDED_PSEUDOCLASS: + case SCE_CSS_EXTENDED_PSEUDOELEMENT: case SCE_CSS_UNKNOWN_PSEUDOCLASS: - if (pseudoClasses.InList(s2)) + if (op == ':' && opPrev != ':' && pseudoClasses.InList(s2)) sc.ChangeState(SCE_CSS_PSEUDOCLASS); + else if (opPrev == ':' && pseudoElements.InList(s2)) + sc.ChangeState(SCE_CSS_PSEUDOELEMENT); + else if ((op == ':' || (op == '(' && lastState == SCE_CSS_EXTENDED_PSEUDOCLASS)) && opPrev != ':' && exPseudoClasses.InList(s2)) + sc.ChangeState(SCE_CSS_EXTENDED_PSEUDOCLASS); + else if (opPrev == ':' && exPseudoElements.InList(s2)) + sc.ChangeState(SCE_CSS_EXTENDED_PSEUDOELEMENT); + else + sc.ChangeState(SCE_CSS_UNKNOWN_PSEUDOCLASS); break; case SCE_CSS_IMPORTANT: if (strcmp(s2, "important") != 0) @@ -223,7 +266,14 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo } } - if (sc.ch != '.' && sc.ch != ':' && sc.ch != '#' && (sc.state == SCE_CSS_CLASS || sc.state == SCE_CSS_PSEUDOCLASS || sc.state == SCE_CSS_UNKNOWN_PSEUDOCLASS || sc.state == SCE_CSS_ID)) + if (sc.ch != '.' && sc.ch != ':' && sc.ch != '#' && ( + sc.state == SCE_CSS_CLASS || sc.state == SCE_CSS_ID || + (sc.ch != '(' && sc.ch != ')' && ( /* This line of the condition makes it possible to extend pseudo-classes with parentheses */ + sc.state == SCE_CSS_PSEUDOCLASS || sc.state == SCE_CSS_PSEUDOELEMENT || + sc.state == SCE_CSS_EXTENDED_PSEUDOCLASS || sc.state == SCE_CSS_EXTENDED_PSEUDOELEMENT || + sc.state == SCE_CSS_UNKNOWN_PSEUDOCLASS + )) + )) sc.SetState(SCE_CSS_TAG); if (sc.Match('/', '*')) { @@ -232,7 +282,7 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo sc.Forward(); } else if (sc.state == SCE_CSS_VALUE && (sc.ch == '\"' || sc.ch == '\'')) { sc.SetState((sc.ch == '\"' ? SCE_CSS_DOUBLESTRING : SCE_CSS_SINGLESTRING)); - } else if (IsCssOperator(static_cast(sc.ch)) + } else if (IsCssOperator(sc.ch) && (sc.state != SCE_CSS_ATTRIBUTE || sc.ch == ']') && (sc.state != SCE_CSS_VALUE || sc.ch == ';' || sc.ch == '}' || sc.ch == '!') && (sc.state != SCE_CSS_DIRECTIVE || sc.ch == ';' || sc.ch == '{') @@ -241,6 +291,7 @@ static void ColouriseCssDoc(unsigned int startPos, int length, int initStyle, Wo lastState = sc.state; sc.SetState(SCE_CSS_OPERATOR); op = sc.ch; + opPrev = sc.chPrev; } } @@ -298,9 +349,14 @@ static void FoldCSSDoc(unsigned int startPos, int length, int, WordList *[], Acc } static const char * const cssWordListDesc[] = { - "CSS1 Keywords", - "Pseudo classes", - "CSS2 Keywords", + "CSS1 Properties", + "Pseudo-classes", + "CSS2 Properties", + "CSS3 Properties", + "Pseudo-elements", + "Browser-Specific CSS Properties", + "Browser-Specific Pseudo-classes", + "Browser-Specific Pseudo-elements", 0 }; diff --git a/scintilla/LexFortran.cxx b/scintilla/LexFortran.cxx index c68c5b62..e66b37eb 100644 --- a/scintilla/LexFortran.cxx +++ b/scintilla/LexFortran.cxx @@ -84,7 +84,15 @@ static void ColouriseFortranDoc(unsigned int startPos, int length, int initStyle int toLineStart = sc.currentPos - posLineStart; if (isFixFormat && (toLineStart < 6 || toLineStart > 72)) { if (toLineStart == 0 && (tolower(sc.ch) == 'c' || sc.ch == '*') || sc.ch == '!') { - sc.SetState(SCE_F_COMMENT); + if (sc.MatchIgnoreCase("cdec$") || sc.MatchIgnoreCase("*dec$") || sc.MatchIgnoreCase("!dec$") || + sc.MatchIgnoreCase("cdir$") || sc.MatchIgnoreCase("*dir$") || sc.MatchIgnoreCase("!dir$") || + sc.MatchIgnoreCase("cms$") || sc.MatchIgnoreCase("*ms$") || sc.MatchIgnoreCase("!ms$") || + sc.chNext == '$') { + sc.SetState(SCE_F_PREPROCESSOR); + } else { + sc.SetState(SCE_F_COMMENT); + } + while (!sc.atLineEnd && sc.More()) sc.Forward(); // Until line end } else if (toLineStart > 72) { sc.SetState(SCE_F_COMMENT); @@ -198,7 +206,8 @@ static void ColouriseFortranDoc(unsigned int startPos, int length, int initStyle // Determine if a new state should be entered. if (sc.state == SCE_F_DEFAULT) { if (sc.ch == '!') { - if (sc.chNext == '$') { + if (sc.MatchIgnoreCase("!dec$") || sc.MatchIgnoreCase("!dir$") || + sc.MatchIgnoreCase("!ms$") || sc.chNext == '$') { sc.SetState(SCE_F_PREPROCESSOR); } else { sc.SetState(SCE_F_COMMENT); diff --git a/scintilla/LexHTML.cxx b/scintilla/LexHTML.cxx index c0a47d9e..6d16c53f 100644 --- a/scintilla/LexHTML.cxx +++ b/scintilla/LexHTML.cxx @@ -581,6 +581,8 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty const bool fold = foldHTML && styler.GetPropertyInt("fold", 0); const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1); const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; + const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0; + const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0; const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0; const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0; @@ -645,9 +647,11 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) { //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle); //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) { - if ((ch == '{') || (ch == '}')) { - levelCurrent += (ch == '{') ? 1 : -1; + if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) { + levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1; } + } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) { + levelCurrent--; } break; case eScriptPython: @@ -771,7 +775,7 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty beforePreProc = state; i++; visibleChars++; - i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10); + i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6); if (scriptLanguage == eScriptXML) styler.ColourTo(i, SCE_H_XMLSTART); else @@ -1622,7 +1626,10 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty } else if (styler.Match(i, "<<<")) { bool isSimpleString = false; i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString); - if (strlen(phpStringDelimiter)) state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING); + if (strlen(phpStringDelimiter)) { + state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING); + if (foldHeredoc) levelCurrent++; + } } else if (ch == '\'') { state = SCE_HPHP_SIMPLESTRING; strcpy(phpStringDelimiter, "\'"); @@ -1689,6 +1696,7 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1; styler.ColourTo(i, StateToPrint); state = SCE_HPHP_DEFAULT; + if (foldHeredoc) levelCurrent--; } } } @@ -1711,6 +1719,7 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1; styler.ColourTo(i, StateToPrint); state = SCE_HPHP_DEFAULT; + if (foldHeredoc) levelCurrent--; } } break; @@ -1747,7 +1756,10 @@ static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initSty } else if (styler.Match(i, "<<<")) { bool isSimpleString = false; i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString); - if (strlen(phpStringDelimiter)) state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING); + if (strlen(phpStringDelimiter)) { + state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING); + if (foldHeredoc) levelCurrent++; + } } else if (ch == '\'') { state = SCE_HPHP_SIMPLESTRING; strcpy(phpStringDelimiter, "\'"); diff --git a/scintilla/LexHaskell.cxx b/scintilla/LexHaskell.cxx index 095675ee..3213bd52 100644 --- a/scintilla/LexHaskell.cxx +++ b/scintilla/LexHaskell.cxx @@ -179,6 +179,9 @@ static void ColorizeHaskellDoc(unsigned int startPos, int length, int initStyle, // Digit if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { sc.SetState(SCE_HA_NUMBER); + if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) { // Match anything starting with "0x" or "0X", too + sc.Forward(1); + } } // Comment line else if (sc.Match("--")) { diff --git a/scintilla/LexLua.cxx b/scintilla/LexLua.cxx index 63114a97..a1e579f2 100644 --- a/scintilla/LexLua.cxx +++ b/scintilla/LexLua.cxx @@ -21,48 +21,12 @@ #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" +#include "CharacterSet.h" #ifdef SCI_NAMESPACE using namespace Scintilla; #endif -// Extended to accept accented characters -static inline bool IsAWordChar(int ch) { - return ch >= 0x80 || - (isalnum(ch) || ch == '.' || ch == '_'); -} - -static inline bool IsAWordStart(int ch) { - return ch >= 0x80 || - (isalpha(ch) || ch == '_'); -} - -static inline bool IsANumberChar(int ch) { - // Not exactly following number definition (several dots are seen as OK, etc.) - // but probably enough in most cases. - return (ch < 0x80) && - (isdigit(ch) || toupper(ch) == 'E' || - ch == '.' || ch == '-' || ch == '+' || - (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')); -} - -static inline bool IsLuaOperator(int ch) { - if (ch >= 0x80 || isalnum(ch)) { - return false; - } - // '.' left out as it is used to make up numbers - if (ch == '*' || ch == '/' || ch == '-' || ch == '+' || - ch == '(' || ch == ')' || ch == '=' || - ch == '{' || ch == '}' || ch == '~' || - ch == '[' || ch == ']' || ch == ';' || - ch == '<' || ch == '>' || ch == ',' || - ch == '.' || ch == '^' || ch == '%' || ch == ':' || - ch == '#') { - return true; - } - return false; -} - // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ], // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on. // The maximum number of '=' characters allowed is 254. @@ -91,6 +55,15 @@ static void ColouriseLuaDoc( WordList &keywords7 = *keywordlists[6]; WordList &keywords8 = *keywordlists[7]; + // Accepts accented characters + CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); + CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true); + // Not exactly following number definition (several dots are seen as OK, etc.) + // but probably enough in most cases. + CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefABCDEF"); + CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#"); + CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\"); + int currentLine = styler.GetLine(startPos); // Initialize long string [[ ... ]] or block comment --[[ ... ]] nesting level, // if we are inside such a string. Block comment was introduced in Lua 5.0, @@ -136,7 +109,7 @@ static void ColouriseLuaDoc( // Handle string line continuation if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) && - sc.ch == '\\') { + sc.ch == '\\') { if (sc.chNext == '\n' || sc.chNext == '\r') { sc.Forward(); if (sc.ch == '\r' && sc.chNext == '\n') { @@ -151,14 +124,14 @@ static void ColouriseLuaDoc( sc.SetState(SCE_LUA_DEFAULT); } else if (sc.state == SCE_LUA_NUMBER) { // We stop the number definition on non-numerical non-dot non-eE non-sign non-hexdigit char - if (!IsANumberChar(sc.ch)) { + if (!setNumber.Contains(sc.ch)) { sc.SetState(SCE_LUA_DEFAULT); } else if (sc.ch == '-' || sc.ch == '+') { - if (sc.chPrev != 'E' && sc.chPrev != 'e') - sc.SetState(SCE_LUA_DEFAULT); - } + if (sc.chPrev != 'E' && sc.chPrev != 'e') + sc.SetState(SCE_LUA_DEFAULT); + } } else if (sc.state == SCE_LUA_IDENTIFIER) { - if (!IsAWordChar(sc.ch) || sc.Match('.', '.')) { + if (!setWord.Contains(sc.ch) || sc.Match('.', '.')) { char s[100]; sc.GetCurrent(s, sizeof(s)); if (keywords.InList(s)) { @@ -186,7 +159,7 @@ static void ColouriseLuaDoc( } } else if (sc.state == SCE_LUA_STRING) { if (sc.ch == '\\') { - if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { + if (setEscapeSkip.Contains(sc.chNext)) { sc.Forward(); } } else if (sc.ch == '\"') { @@ -197,7 +170,7 @@ static void ColouriseLuaDoc( } } else if (sc.state == SCE_LUA_CHARACTER) { if (sc.ch == '\\') { - if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { + if (setEscapeSkip.Contains(sc.chNext)) { sc.Forward(); } } else if (sc.ch == '\'') { @@ -233,9 +206,9 @@ static void ColouriseLuaDoc( if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { sc.SetState(SCE_LUA_NUMBER); if (sc.ch == '0' && toupper(sc.chNext) == 'X') { - sc.Forward(1); + sc.Forward(); } - } else if (IsAWordStart(sc.ch)) { + } else if (setWordStart.Contains(sc.ch)) { sc.SetState(SCE_LUA_IDENTIFIER); } else if (sc.ch == '\"') { sc.SetState(SCE_LUA_STRING); @@ -265,11 +238,34 @@ static void ColouriseLuaDoc( } } else if (sc.atLineStart && sc.Match('$')) { sc.SetState(SCE_LUA_PREPROCESSOR); // Obsolete since Lua 4.0, but still in old code - } else if (IsLuaOperator(static_cast(sc.ch))) { + } else if (setLuaOperator.Contains(sc.ch)) { sc.SetState(SCE_LUA_OPERATOR); } } } + + if (setWord.Contains(sc.chPrev)) { + char s[100]; + sc.GetCurrent(s, sizeof(s)); + if (keywords.InList(s)) { + sc.ChangeState(SCE_LUA_WORD); + } else if (keywords2.InList(s)) { + sc.ChangeState(SCE_LUA_WORD2); + } else if (keywords3.InList(s)) { + sc.ChangeState(SCE_LUA_WORD3); + } else if (keywords4.InList(s)) { + sc.ChangeState(SCE_LUA_WORD4); + } else if (keywords5.InList(s)) { + sc.ChangeState(SCE_LUA_WORD5); + } else if (keywords6.InList(s)) { + sc.ChangeState(SCE_LUA_WORD6); + } else if (keywords7.InList(s)) { + sc.ChangeState(SCE_LUA_WORD7); + } else if (keywords8.InList(s)) { + sc.ChangeState(SCE_LUA_WORD8); + } + } + sc.Complete(); } diff --git a/scintilla/LexOthers.cxx b/scintilla/LexOthers.cxx index 430e5415..f472c590 100644 --- a/scintilla/LexOthers.cxx +++ b/scintilla/LexOthers.cxx @@ -62,7 +62,6 @@ static void ColouriseBatchLine( Accessor &styler) { unsigned int offset = 0; // Line Buffer Offset - unsigned int enVarEnd; // Environment Variable End point unsigned int cmdLoc; // External Command / Program Location char wordBuffer[81]; // Word Buffer - large to catch long paths unsigned int wbl; // Word Buffer Length @@ -115,39 +114,6 @@ static void ColouriseBatchLine( if (lineBuffer[offset] == '@') { styler.ColourTo(startLine + offset, SCE_BAT_HIDE); offset++; - // Check for Argument (%n) or Environment Variable (%x...%) - } else if (lineBuffer[offset] == '%') { - enVarEnd = offset + 1; - // Search end of word for second % (can be a long path) - while ((enVarEnd < lengthLine) && - (!isspacechar(lineBuffer[enVarEnd])) && - (lineBuffer[enVarEnd] != '%') && - (!IsBOperator(lineBuffer[enVarEnd])) && - (!IsBSeparator(lineBuffer[enVarEnd]))) { - enVarEnd++; - } - // Check for Argument (%n) - if ((Is0To9(lineBuffer[offset + 1])) && - (lineBuffer[enVarEnd] != '%')) { - // Colorize Argument - styler.ColourTo(startLine + offset + 1, SCE_BAT_IDENTIFIER); - offset += 2; - // Check for External Command / Program - if (offset < lengthLine && !isspacechar(lineBuffer[offset])) { - cmdLoc = offset; - } - // Check for Environment Variable (%x...%) - } else if ((lineBuffer[offset + 1] != '%') && - (lineBuffer[enVarEnd] == '%')) { - offset = enVarEnd; - // Colorize Environment Variable - styler.ColourTo(startLine + offset, SCE_BAT_IDENTIFIER); - offset++; - // Check for External Command / Program - if (offset < lengthLine && !isspacechar(lineBuffer[offset])) { - cmdLoc = offset; - } - } } // Skip next spaces while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) { @@ -354,8 +320,8 @@ static void ColouriseBatchLine( (!IsBSeparator(wordBuffer[wbo]))) { wbo++; } - // Check for Argument (%n) - if ((Is0To9(wordBuffer[1])) && + // Check for Argument (%n) or (%*) + if (((Is0To9(wordBuffer[1])) || (wordBuffer[1] == '*')) && (wordBuffer[wbo] != '%')) { // Check for External Command / Program if (cmdLoc == offset - wbl) { @@ -365,6 +331,17 @@ static void ColouriseBatchLine( styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_BAT_IDENTIFIER); // Reset Offset to re-process remainder of word offset -= (wbl - 2); + // Check for Expanded Argument (%~...) / Variable (%%~...) + } else if (((wbl > 1) && (wordBuffer[1] == '~')) || + ((wbl > 2) && (wordBuffer[1] == '%') && (wordBuffer[2] == '~'))) { + // Check for External Command / Program + if (cmdLoc == offset - wbl) { + cmdLoc = offset - (wbl - wbo); + } + // Colorize Expanded Argument / Variable + styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_IDENTIFIER); + // Reset Offset to re-process remainder of word + offset -= (wbl - wbo); // Check for Environment Variable (%x...%) } else if ((wordBuffer[1] != '%') && (wordBuffer[wbo] == '%')) { @@ -495,9 +472,13 @@ static void ColouriseDiffLine(char *lineBuffer, int endLine, Accessor &styler) { // otherwise it is considered a comment (Only in..., Binary file...) if (0 == strncmp(lineBuffer, "diff ", 5)) { styler.ColourTo(endLine, SCE_DIFF_COMMAND); - } else if (0 == strncmp(lineBuffer, "--- ", 4)) { + } else if (0 == strncmp(lineBuffer, "Index: ", 7)) { // For subversion's diff + styler.ColourTo(endLine, SCE_DIFF_COMMAND); + } else if (0 == strncmp(lineBuffer, "---", 3)) { // In a context diff, --- appears in both the header and the position markers - if (atoi(lineBuffer+4) && !strchr(lineBuffer, '/')) + if (lineBuffer[3] == ' ' && atoi(lineBuffer + 4) && !strchr(lineBuffer, '/')) + styler.ColourTo(endLine, SCE_DIFF_POSITION); + else if (lineBuffer[3] == '\r' || lineBuffer[3] == '\n') styler.ColourTo(endLine, SCE_DIFF_POSITION); else styler.ColourTo(endLine, SCE_DIFF_HEADER); @@ -530,6 +511,8 @@ static void ColouriseDiffLine(char *lineBuffer, int endLine, Accessor &styler) { styler.ColourTo(endLine, SCE_DIFF_DELETED); } else if (lineBuffer[0] == '+' || lineBuffer[0] == '>') { styler.ColourTo(endLine, SCE_DIFF_ADDED); + } else if (lineBuffer[0] == '!') { + styler.ColourTo(endLine, SCE_DIFF_CHANGED); } else if (lineBuffer[0] != ' ') { styler.ColourTo(endLine, SCE_DIFF_COMMENT); } else { @@ -556,25 +539,24 @@ static void ColouriseDiffDoc(unsigned int startPos, int length, int, WordList *[ } } -static void FoldDiffDoc(unsigned int startPos, int length, int, WordList*[], Accessor &styler) { +static void FoldDiffDoc(unsigned int startPos, int length, int, WordList *[], Accessor &styler) { int curLine = styler.GetLine(startPos); - int prevLevel = SC_FOLDLEVELBASE; - if (curLine > 0) - prevLevel = styler.LevelAt(curLine-1); - int curLineStart = styler.LineStart(curLine); - do { - int nextLevel = prevLevel; - if (prevLevel & SC_FOLDLEVELHEADERFLAG) - nextLevel = (prevLevel & SC_FOLDLEVELNUMBERMASK) + 1; + int prevLevel = curLine > 0 ? styler.LevelAt(curLine - 1) : SC_FOLDLEVELBASE; + int nextLevel; + do { int lineType = styler.StyleAt(curLineStart); if (lineType == SCE_DIFF_COMMAND) + nextLevel = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; + else if (lineType == SCE_DIFF_HEADER) nextLevel = (SC_FOLDLEVELBASE + 1) | SC_FOLDLEVELHEADERFLAG; - else if (lineType == SCE_DIFF_HEADER) { + else if (lineType == SCE_DIFF_POSITION && styler[curLineStart] != '-') nextLevel = (SC_FOLDLEVELBASE + 2) | SC_FOLDLEVELHEADERFLAG; - } else if (lineType == SCE_DIFF_POSITION) - nextLevel = (SC_FOLDLEVELBASE + 3) | SC_FOLDLEVELHEADERFLAG; + else if (prevLevel & SC_FOLDLEVELHEADERFLAG) + nextLevel = (prevLevel & SC_FOLDLEVELNUMBERMASK) + 1; + else + nextLevel = prevLevel; if ((nextLevel & SC_FOLDLEVELHEADERFLAG) && (nextLevel == prevLevel)) styler.SetLevel(curLine-1, prevLevel & ~SC_FOLDLEVELHEADERFLAG); @@ -658,6 +640,7 @@ static void ColourisePoDoc(unsigned int startPos, int length, int, WordList *[], } } + static void ColourisePropsLine( char *lineBuffer, unsigned int lengthLine, @@ -993,7 +976,7 @@ static int RecogniseErrorListLine(const char *lineBuffer, unsigned int lengthLin if ((chNext != '\\') && (chNext != '/') && (chNext != ' ')) { // This check is not completely accurate as may be on // GTK+ with a file name that includes ':'. - state = stGccStart; + state = stGccStart; } else if (chNext == ' ') { // indicates a Lua 5.1 error message initialColonPart = true; } diff --git a/scintilla/LexPerl.cxx b/scintilla/LexPerl.cxx index ff8d771c..f57f73c2 100644 --- a/scintilla/LexPerl.cxx +++ b/scintilla/LexPerl.cxx @@ -1,6 +1,6 @@ // Scintilla source code edit control /** @file LexPerl.cxx - ** Lexer for subset of Perl. + ** Lexer for Perl. **/ // Copyright 1998-2008 by Neil Hodgson // Lexical analysis fixes by Kein-Hong Man @@ -16,166 +16,336 @@ #include "PropSet.h" #include "Accessor.h" +#include "StyleContext.h" #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" +#include "CharacterSet.h" #ifdef SCI_NAMESPACE using namespace Scintilla; #endif -#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot -#define PERLNUM_HEX 2 -#define PERLNUM_OCTAL 3 -#define PERLNUM_FLOAT 4 // actually exponent part -#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings -#define PERLNUM_VECTOR 6 -#define PERLNUM_V_VECTOR 7 -#define PERLNUM_BAD 8 +// Info for HERE document handling from perldata.pod (reformatted): +// ---------------------------------------------------------------- +// A line-oriented form of quoting is based on the shell ``here-doc'' syntax. +// Following a << you specify a string to terminate the quoted material, and +// all lines following the current line down to the terminating string are +// the value of the item. +// * The terminating string may be either an identifier (a word), or some +// quoted text. +// * If quoted, the type of quotes you use determines the treatment of the +// text, just as in regular quoting. +// * An unquoted identifier works like double quotes. +// * There must be no space between the << and the identifier. +// (If you put a space it will be treated as a null identifier, +// which is valid, and matches the first empty line.) +// (This is deprecated, -w warns of this syntax) +// * The terminating string must appear by itself (unquoted and +// with no surrounding whitespace) on the terminating line. -#define BACK_NONE 0 // lookback state for bareword disambiguation: -#define BACK_OPERATOR 1 // whitespace/comments are insignificant -#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation +#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter -#define HERE_DELIM_MAX 256 +#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot +#define PERLNUM_HEX 2 +#define PERLNUM_OCTAL 3 +#define PERLNUM_FLOAT_EXP 4 // exponent part only +#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings +#define PERLNUM_VECTOR 6 +#define PERLNUM_V_VECTOR 7 +#define PERLNUM_BAD 8 -static inline bool isEOLChar(char ch) { - return (ch == '\r') || (ch == '\n'); -} +#define BACK_NONE 0 // lookback state for bareword disambiguation: +#define BACK_OPERATOR 1 // whitespace/comments are insignificant +#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation -static bool isSingleCharOp(char ch) { - char strCharSet[2]; - strCharSet[0] = ch; - strCharSet[1] = '\0'; - return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet)); -} - -static inline bool isPerlOperator(char ch) { - if (ch == '^' || ch == '&' || ch == '\\' || - ch == '(' || ch == ')' || ch == '-' || ch == '+' || - ch == '=' || ch == '|' || ch == '{' || ch == '}' || - ch == '[' || ch == ']' || ch == ':' || ch == ';' || - ch == '>' || ch == ',' || - ch == '?' || ch == '!' || ch == '.' || ch == '~') - return true; - // these chars are already tested before this call - // ch == '%' || ch == '*' || ch == '<' || ch == '/' || - return false; -} - -static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { +static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) +{ + // old-style keyword matcher; needed because GetCurrent() needs + // current segment to be committed, but we may abandon early... char s[100]; - unsigned int i, len = end - start; - if (len > 30) { len = 30; } + unsigned int i, len = end - start; + if (len > 30) { len = 30; } for (i = 0; i < len; i++, start++) s[i] = styler[start]; - s[i] = '\0'; + s[i] = '\0'; return keywords.InList(s); } -// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values -// Note: iswordchar() was used in only one place in LexPerl, it is -// unnecessary as '.' is processed as the concatenation operator, so -// only isWordStart() is used in LexPerl - -static inline bool isWordStart(char ch) { - return !isascii(ch) || isalnum(ch) || ch == '_'; -} - -static inline bool isEndVar(char ch) { - return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' && - ch != '_' && ch != '\''; -} - -static inline bool isNonQuote(char ch) { - return !isascii(ch) || isalnum(ch) || ch == '_'; -} - -static inline char actualNumStyle(int numberStyle) { - if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { - return SCE_PL_STRING; - } else if (numberStyle == PERLNUM_BAD) { - return SCE_PL_ERROR; - } - return SCE_PL_NUMBER; -} - -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { - if ((pos + static_cast(strlen(val))) >= lengthDoc) { - return false; +static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw, + int backFlag, unsigned int backPos, unsigned int endPos) +{ + // identifiers are recognized by Perl as barewords under some + // conditions, the following attempts to do the disambiguation + // by looking backward and forward; result in 2 LSB + int result = 0; + bool moreback = false; // true if passed newline/comments + bool brace = false; // true if opening brace found + // if BACK_NONE, neither operator nor keyword, so skip test + if (backFlag == BACK_NONE) + return result; + // first look backwards past whitespace/comments to set EOL flag + // (some disambiguation patterns must be on a single line) + if (backPos <= static_cast(styler.LineStart(styler.GetLine(bk)))) + moreback = true; + // look backwards at last significant lexed item for disambiguation + bk = backPos - 1; + int ch = static_cast(styler.SafeGetCharAt(bk)); + if (ch == '{' && !moreback) { + // {bareword: possible variable spec + brace = true; + } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') + // &bareword: subroutine call + || styler.Match(bk - 1, "->") + // ->bareword: part of variable spec + || styler.Match(bk - 2, "sub")) { + // sub bareword: subroutine declaration + // (implied BACK_KEYWORD, no keywords end in 'sub'!) + result |= 1; } - while (*val) { - if (*val != styler[pos++]) { - return false; + // next, scan forward after word past tab/spaces only; + // if ch isn't one of '[{(,' we can skip the test + if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') + && fw < endPos) { + while (ch = static_cast(styler.SafeGetCharAt(fw)), + IsASpaceOrTab(ch) && fw < endPos) { + fw++; + } + if ((ch == '}' && brace) + // {bareword}: variable spec + || styler.Match(fw, "=>")) { + // [{(, bareword=>: hash literal + result |= 2; } - val++; } + return result; +} + +static void skipWhitespaceComment(Accessor &styler, unsigned int &p) +{ + // when backtracking, we need to skip whitespace and comments + int style; + while ((p > 0) && (style = styler.StyleAt(p), + style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE)) + p--; +} + +static int styleBeforeBracePair(Accessor &styler, unsigned int bk) +{ + // backtrack to find open '{' corresponding to a '}', balanced + // return significant style to be tested for '/' disambiguation + int braceCount = 1; + if (bk == 0) + return SCE_PL_DEFAULT; + while (--bk > 0) { + if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { + int bkch = static_cast(styler.SafeGetCharAt(bk)); + if (bkch == ';') { // early out + break; + } else if (bkch == '}') { + braceCount++; + } else if (bkch == '{') { + if (--braceCount == 0) break; + } + } + } + if (bk > 0 && braceCount == 0) { + // balanced { found, bk > 0, skip more whitespace/comments + bk--; + skipWhitespaceComment(styler, bk); + return styler.StyleAt(bk); + } + return SCE_PL_DEFAULT; +} + +static int styleCheckIdentifier(Accessor &styler, unsigned int bk) +{ + // backtrack to classify sub-styles of identifier under test + // return sub-style to be tested for '/' disambiguation + if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like + return 1; + // backtrack to check for possible "->" or "::" before identifier + while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { + bk--; + } + while (bk > 0) { + int bkstyle = styler.StyleAt(bk); + if (bkstyle == SCE_PL_DEFAULT + || bkstyle == SCE_PL_COMMENTLINE) { + // skip whitespace, comments + } else if (bkstyle == SCE_PL_OPERATOR) { + // test for "->" and "::" + if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::")) + return 2; + } else + return 3; // bare identifier + bk--; + } + return 0; +} + +static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos) +{ + // looks forward for matching > on same line; a bit ugly + unsigned int fw = pos; + while (++fw < endPos) { + int fwch = static_cast(styler.SafeGetCharAt(fw)); + if (fwch == '\r' || fwch == '\n') { + return 0; + } else if (fwch == '>') { + if (styler.Match(fw - 2, "<=>")) // '<=>' case + return 0; + return fw - pos; + } + } + return 0; +} + +static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos) +{ + // forward scan the current line to classify line for POD style + int state = -1; + while (pos <= endPos) { + int ch = static_cast(styler.SafeGetCharAt(pos)); + if (ch == '\n' || ch == '\r' || pos >= endPos) { + if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; + break; + } + if (IsASpaceOrTab(ch)) { // whitespace, take note + if (state == -1) + state = SCE_PL_DEFAULT; + } else if (state == SCE_PL_DEFAULT) { // verbatim POD line + state = SCE_PL_POD_VERB; + } else if (state != SCE_PL_POD_VERB) { // regular POD line + state = SCE_PL_POD; + } + pos++; + } + if (state == -1) + state = SCE_PL_DEFAULT; + return state; +} + +static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk) +{ + // backtrack to identify if we're starting a subroutine prototype + // we also need to ignore whitespace/comments: + // 'sub' [whitespace|comment] [whitespace|comment] + styler.Flush(); + skipWhitespaceComment(styler, bk); + if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier + return false; + while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { + bk--; + } + skipWhitespaceComment(styler, bk); + if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword + || !styler.Match(bk - 2, "sub")) // assume suffix is unique! + return false; return true; } -static char opposite(char ch) { - if (ch == '(') - return ')'; - if (ch == '[') - return ']'; - if (ch == '{') - return '}'; - if (ch == '<') - return '>'; +static bool isMatch(const char *sref, char *s) +{ + // match per-line delimiter - must kill trailing CR if CRLF + if (s[strlen(s) - 1] == '\r') + s[strlen(s) - 1] = '\0'; + return (strcmp(sref, s) == 0); +} + +static int actualNumStyle(int numberStyle) { + if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { + return SCE_PL_STRING; + } else if (numberStyle == PERLNUM_BAD) { + return SCE_PL_ERROR; + } + return SCE_PL_NUMBER; +} + +static int opposite(int ch) { + if (ch == '(') return ')'; + if (ch == '[') return ']'; + if (ch == '{') return '}'; + if (ch == '<') return '>'; return ch; } static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { - // Lexer for perl often has to backtrack to start of current style to determine - // which characters are being used as quotes, how deeply nested is the - // start position and what the termination string is for here documents - WordList &keywords = *keywordlists[0]; - // keywords that forces /PATTERN/ at all times - WordList reWords; - reWords.Set("elsif if split while"); + // keywords that forces /PATTERN/ at all times; should track vim's behaviour + WordList reWords; + reWords.Set("elsif if split while"); - class HereDocCls { + // charset classes + CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); + CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true); + CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC"); + // lexing of "%*,?!.~"); + CharacterSet setQDelim(CharacterSet::setNone, "qrwx"); + CharacterSet setModifiers(CharacterSet::setAlpha); + CharacterSet setPreferRE(CharacterSet::setNone, "*/<%"); + // setArray and setHash also accepts chars for special vars like $_, + // which are then truncated when the next char does not match setVar + CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true); + CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true); + CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true); + CharacterSet &setPOD = setModifiers; + CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@"); + CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); + CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];"); + // for format identifiers + CharacterSet setFormatStart(CharacterSet::setAlpha, "_="); + CharacterSet &setFormat = setHereDocDelim; + + // Lexer for perl often has to backtrack to start of current style to determine + // which characters are being used as quotes, how deeply nested is the + // start position and what the termination string is for HERE documents. + + class HereDocCls { // Class to manage HERE doc sequence public: int State; // 0: '<<' encountered - // 1: collect the delimiter - // 2: here doc text (lines after the delimiter) - char Quote; // the char after '<<' + // 1: collect the delimiter + // 2: here doc text (lines after the delimiter) + int Quote; // the char after '<<' bool Quoted; // true if Quote in ('\'','"','`') int DelimiterLength; // strlen(Delimiter) char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf HereDocCls() { State = 0; - Quote = 0; - Quoted = false; + Quote = 0; + Quoted = false; DelimiterLength = 0; Delimiter = new char[HERE_DELIM_MAX]; Delimiter[0] = '\0'; } + void Append(int ch) { + Delimiter[DelimiterLength++] = static_cast(ch); + Delimiter[DelimiterLength] = '\0'; + } ~HereDocCls() { delete []Delimiter; } }; - HereDocCls HereDoc; // TODO: FIFO for stacked here-docs + HereDocCls HereDoc; // TODO: FIFO for stacked here-docs - class QuoteCls { + class QuoteCls { // Class to manage quote pairs public: - int Rep; - int Count; - char Up; - char Down; + int Rep; + int Count; + int Up, Down; QuoteCls() { this->New(1); } - void New(int r) { + void New(int r = 1) { Rep = r; Count = 0; Up = '\0'; Down = '\0'; } - void Open(char u) { + void Open(int u) { Count++; Up = u; Down = opposite(Up); @@ -183,525 +353,700 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, }; QuoteCls Quote; - int state = initStyle; - char numState = PERLNUM_DECIMAL; + // additional state for number lexing + int numState = PERLNUM_DECIMAL; int dotCount = 0; - unsigned int lengthDoc = startPos + length; - //int sookedpos = 0; // these have no apparent use, see POD state - //char sooked[100]; - //sooked[sookedpos] = '\0'; - styler.StartAt(startPos, static_cast(STYLE_MAX)); - // If in a long distance lexical state, seek to the beginning to find quote characters - // Perl strings can be multi-line with embedded newlines, so backtrack. - // Perl numbers have additional state during lexing, so backtrack too. - if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) { - while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) { - startPos--; - } - startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); - } - // Backtrack for format body. - if (state == SCE_PL_FORMAT) { - while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) { - startPos--; - } - startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); - } - if ( state == SCE_PL_STRING_Q - || state == SCE_PL_STRING_QQ - || state == SCE_PL_STRING_QX - || state == SCE_PL_STRING_QR - || state == SCE_PL_STRING_QW - || state == SCE_PL_REGEX - || state == SCE_PL_REGSUBST - || state == SCE_PL_STRING - || state == SCE_PL_BACKTICKS - || state == SCE_PL_CHARACTER - || state == SCE_PL_NUMBER - || state == SCE_PL_IDENTIFIER - || state == SCE_PL_ERROR - || state == SCE_PL_SUB_PROTOTYPE + unsigned int endPos = startPos + length; + + // Backtrack to beginning of style if required... + // If in a long distance lexical state, backtrack to find quote characters. + // Includes strings (may be multi-line), numbers (additional state), format + // bodies, as well as POD sections. + if (initStyle == SCE_PL_HERE_Q + || initStyle == SCE_PL_HERE_QQ + || initStyle == SCE_PL_HERE_QX + || initStyle == SCE_PL_FORMAT ) { - while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { + int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; + while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) { startPos--; } - state = SCE_PL_DEFAULT; + startPos = styler.LineStart(styler.GetLine(startPos)); + initStyle = styler.StyleAt(startPos - 1); + } + if (initStyle == SCE_PL_STRING_Q + || initStyle == SCE_PL_STRING_QQ + || initStyle == SCE_PL_STRING_QX + || initStyle == SCE_PL_STRING_QR + || initStyle == SCE_PL_STRING_QW + || initStyle == SCE_PL_REGEX + || initStyle == SCE_PL_REGSUBST + || initStyle == SCE_PL_STRING + || initStyle == SCE_PL_BACKTICKS + || initStyle == SCE_PL_CHARACTER + || initStyle == SCE_PL_NUMBER + || initStyle == SCE_PL_IDENTIFIER + || initStyle == SCE_PL_ERROR + || initStyle == SCE_PL_SUB_PROTOTYPE + ) { + while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { + startPos--; + } + initStyle = SCE_PL_DEFAULT; + } else if (initStyle == SCE_PL_POD + || initStyle == SCE_PL_POD_VERB + ) { + // POD backtracking finds preceeding blank lines and goes back past them + int ln = styler.GetLine(startPos); + if (ln > 0) { + initStyle = styler.StyleAt(styler.LineStart(--ln)); + if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { + while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) + ln--; + } + startPos = styler.LineStart(++ln); + initStyle = styler.StyleAt(startPos - 1); + } else { + startPos = 0; + initStyle = SCE_PL_DEFAULT; + } } - // lookback at start of lexing to set proper state for backflag - // after this, they are updated when elements are lexed - int backflag = BACK_NONE; - unsigned int backPos = startPos; - if (backPos > 0) { - backPos--; - int sty = SCE_PL_DEFAULT; - while ((backPos > 0) && (sty = styler.StyleAt(backPos), - sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) - backPos--; - if (sty == SCE_PL_OPERATOR) - backflag = BACK_OPERATOR; - else if (sty == SCE_PL_WORD) - backflag = BACK_KEYWORD; - } + // backFlag, backPos are additional state to aid identifier corner cases. + // Look backwards past whitespace and comments in order to detect either + // operator or keyword. Later updated as we go along. + int backFlag = BACK_NONE; + unsigned int backPos = startPos; + if (backPos > 0) { + backPos--; + skipWhitespaceComment(styler, backPos); + if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) + backFlag = BACK_OPERATOR; + else if (styler.StyleAt(backPos) == SCE_PL_WORD) + backFlag = BACK_KEYWORD; + backPos++; + } - styler.StartAt(startPos, static_cast(STYLE_MAX)); - char chPrev = styler.SafeGetCharAt(startPos - 1); - if (startPos == 0) - chPrev = '\n'; - char chNext = styler[startPos]; - styler.StartSegment(startPos); + StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast(STYLE_MAX)); - for (unsigned int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - // if the current character is not consumed due to the completion of an - // earlier style, lexing can be restarted via a simple goto - restartLexer: - chNext = styler.SafeGetCharAt(i + 1); - char chNext2 = styler.SafeGetCharAt(i + 2); + for (; sc.More(); sc.Forward()) { - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - chPrev = ' '; - i += 1; - continue; + // Determine if the current state should terminate. + switch (sc.state) { + case SCE_PL_OPERATOR: + sc.SetState(SCE_PL_DEFAULT); + backFlag = BACK_OPERATOR; + backPos = sc.currentPos; + break; + case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol + if ((!setWord.Contains(sc.ch) && sc.ch != '\'') + || sc.Match('.', '.') + || sc.chPrev == '>') { // end of inputsymbol + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_WORD: // keyword, plus special cases + if (!setWord.Contains(sc.ch)) { + char s[100]; + sc.GetCurrent(s, sizeof(s)); + if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) { + sc.ChangeState(SCE_PL_DATASECTION); + } else { + if ((strcmp(s, "format") == 0)) { + sc.SetState(SCE_PL_FORMAT_IDENT); + HereDoc.State = 0; + } else { + sc.SetState(SCE_PL_DEFAULT); + } + backFlag = BACK_KEYWORD; + backPos = sc.currentPos; + } + } + break; + case SCE_PL_SCALAR: + case SCE_PL_ARRAY: + case SCE_PL_HASH: + case SCE_PL_SYMBOLTABLE: + if (sc.Match(':', ':')) { // skip :: + sc.Forward(); + } else if (!setVar.Contains(sc.ch)) { + if (sc.LengthCurrent() == 1) { + // Special variable: $(, $_ etc. + sc.Forward(); + } + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_NUMBER: + // if no early break, number style is terminated at "(go through)" + if (sc.ch == '.') { + if (sc.chNext == '.') { + // double dot is always an operator (go through) + } else if (numState <= PERLNUM_FLOAT_EXP) { + // non-decimal number or float exponent, consume next dot + sc.SetState(SCE_PL_OPERATOR); + break; + } else { // decimal or vectors allows dots + dotCount++; + if (numState == PERLNUM_DECIMAL) { + if (dotCount <= 1) // number with one dot in it + break; + if (IsADigit(sc.chNext)) { // really a vector + numState = PERLNUM_VECTOR; + break; + } + // number then dot (go through) + } else if (IsADigit(sc.chNext)) // vectors + break; + // vector then dot (go through) + } + } else if (sc.ch == '_') { + // permissive underscoring for number and vector literals + break; + } else if (numState == PERLNUM_DECIMAL) { + if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign + numState = PERLNUM_FLOAT_EXP; + if (sc.chNext == '+' || sc.chNext == '-') { + sc.Forward(); + } + break; + } else if (IsADigit(sc.ch)) + break; + // number then word (go through) + } else if (numState == PERLNUM_HEX) { + if (IsADigit(sc.ch, 16)) + break; + } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { + if (IsADigit(sc.ch)) // vector + break; + if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word + sc.ChangeState(SCE_PL_IDENTIFIER); + break; + } + // vector then word (go through) + } else if (IsADigit(sc.ch)) { + if (numState == PERLNUM_FLOAT_EXP) { + break; + } else if (numState == PERLNUM_OCTAL) { + if (sc.ch <= '7') break; + } else if (numState == PERLNUM_BINARY) { + if (sc.ch <= '1') break; + } + // mark invalid octal, binary numbers (go through) + numState = PERLNUM_BAD; + break; + } + // complete current number or vector + sc.ChangeState(actualNumStyle(numState)); + sc.SetState(SCE_PL_DEFAULT); + break; + case SCE_PL_COMMENTLINE: + if (sc.atLineEnd) { + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_HERE_DELIM: + if (HereDoc.State == 0) { // '<<' encountered + int delim_ch = sc.chNext; + int ws_skip = 0; + HereDoc.State = 1; // pre-init HERE doc class + HereDoc.Quote = sc.chNext; + HereDoc.Quoted = false; + HereDoc.DelimiterLength = 0; + HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + if (IsASpaceOrTab(delim_ch)) { + // skip whitespace; legal only for quoted delimiters + unsigned int i = sc.currentPos + 1; + while ((i < endPos) && IsASpaceOrTab(delim_ch)) { + i++; + delim_ch = static_cast(styler.SafeGetCharAt(i)); + } + ws_skip = i - sc.currentPos - 1; + } + if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { + // a quoted here-doc delimiter; skip any whitespace + sc.Forward(ws_skip + 1); + HereDoc.Quote = delim_ch; + HereDoc.Quoted = true; + } else if (ws_skip == 0 && setNonHereDoc.Contains(sc.chNext) + || ws_skip > 0) { + // left shift << or <<= operator cases + // restore position if operator + sc.ChangeState(SCE_PL_OPERATOR); + sc.ForwardSetState(SCE_PL_DEFAULT); + backFlag = BACK_OPERATOR; + backPos = sc.currentPos; + HereDoc.State = 0; + } else { + // specially handle initial '\' for identifier + if (ws_skip == 0 && HereDoc.Quote == '\\') + sc.Forward(); + // an unquoted here-doc delimiter, no special handling + // (cannot be prefixed by spaces/tabs), or + // symbols terminates; deprecated zero-length delimiter + } + } else if (HereDoc.State == 1) { // collect the delimiter + backFlag = BACK_NONE; + if (HereDoc.Quoted) { // a quoted here-doc delimiter + if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter + sc.ForwardSetState(SCE_PL_DEFAULT); + } else if (!sc.atLineEnd) { + if (sc.Match('\\', static_cast(HereDoc.Quote))) { // escaped quote + sc.Forward(); + } + if (sc.ch != '\r') { // skip CR if CRLF + HereDoc.Append(sc.ch); + } + } + } else { // an unquoted here-doc delimiter + if (setHereDocDelim.Contains(sc.ch)) { + HereDoc.Append(sc.ch); + } else { + sc.SetState(SCE_PL_DEFAULT); + } + } + if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { + sc.SetState(SCE_PL_ERROR); + HereDoc.State = 0; + } + } + break; + case SCE_PL_HERE_Q: + case SCE_PL_HERE_QQ: + case SCE_PL_HERE_QX: { + // also implies HereDoc.State == 2 + sc.Complete(); + while (!sc.atLineEnd) + sc.Forward(); + char s[HERE_DELIM_MAX]; + sc.GetCurrent(s, sizeof(s)); + if (isMatch(HereDoc.Delimiter, s)) { + sc.SetState(SCE_PL_DEFAULT); + backFlag = BACK_NONE; + HereDoc.State = 0; + } + } break; + case SCE_PL_POD: + case SCE_PL_POD_VERB: { + unsigned int fw = sc.currentPos; + int ln = styler.GetLine(fw); + if (sc.atLineStart && sc.Match("=cut")) { // end of POD + sc.SetState(SCE_PL_POD); + sc.Forward(4); + sc.SetState(SCE_PL_DEFAULT); + styler.SetLineState(ln, SCE_PL_POD); + break; + } + int pod = podLineScan(styler, fw, endPos); // classify POD line + styler.SetLineState(ln, pod); + if (pod == SCE_PL_DEFAULT) { + if (sc.state == SCE_PL_POD_VERB) { + unsigned int fw2 = fw; + while (fw2 <= endPos && pod == SCE_PL_DEFAULT) { + fw = fw2++; // penultimate line (last blank line) + pod = podLineScan(styler, fw2, endPos); + styler.SetLineState(styler.GetLine(fw2), pod); + } + if (pod == SCE_PL_POD) { // truncate verbatim POD early + sc.SetState(SCE_PL_POD); + } else + fw = fw2; + } else + pod = SCE_PL_POD; + } else { + if (pod == SCE_PL_POD_VERB // still part of current paragraph + && (styler.GetLineState(ln - 1) == SCE_PL_POD)) { + pod = SCE_PL_POD; + styler.SetLineState(ln, pod); + } else if (pod == SCE_PL_POD + && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { + pod = SCE_PL_POD_VERB; + styler.SetLineState(ln, pod); + } + sc.SetState(pod); + } + sc.Forward(fw - sc.currentPos); // commit style + } break; + case SCE_PL_REGEX: + case SCE_PL_STRING_QR: + if (Quote.Rep <= 0) { + if (!setModifiers.Contains(sc.ch)) + sc.SetState(SCE_PL_DEFAULT); + } else if (!Quote.Up && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + Quote.Rep--; + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_REGSUBST: + if (Quote.Rep <= 0) { + if (!setModifiers.Contains(sc.ch)) + sc.SetState(SCE_PL_DEFAULT); + } else if (!Quote.Up && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (Quote.Count == 0 && Quote.Rep == 1) { + // We matched something like s(...) or tr{...}, Perl 5.10 + // appears to allow almost any character for use as the + // next delimiters. Whitespace and comments are accepted in + // between, but we'll limit to whitespace here. + // For '#', if no whitespace in between, it's a delimiter. + if (IsASpace(sc.ch)) { + // Keep going + } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) { + sc.SetState(SCE_PL_DEFAULT); + } else { + Quote.Open(sc.ch); + } + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + Quote.Rep--; + if (Quote.Up == Quote.Down) + Quote.Count++; + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_STRING_Q: + case SCE_PL_STRING_QQ: + case SCE_PL_STRING_QX: + case SCE_PL_STRING_QW: + case SCE_PL_STRING: + case SCE_PL_CHARACTER: + case SCE_PL_BACKTICKS: + if (!Quote.Down && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + sc.ForwardSetState(SCE_PL_DEFAULT); + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_SUB_PROTOTYPE: { + int i = 0; + // forward scan; must all be valid proto characters + while (setSubPrototype.Contains(sc.GetRelative(i))) + i++; + if (sc.GetRelative(i) == ')') { // valid sub prototype + sc.Forward(i); + sc.ForwardSetState(SCE_PL_DEFAULT); + } else { + // abandon prototype, restart from '(' + sc.ChangeState(SCE_PL_OPERATOR); + sc.SetState(SCE_PL_DEFAULT); + } + } break; + case SCE_PL_FORMAT: { + sc.Complete(); + while (!sc.atLineEnd) + sc.Forward(); + char s[10]; + sc.GetCurrent(s, sizeof(s)); + if (isMatch(".", s)) + sc.SetState(SCE_PL_DEFAULT); + } break; + case SCE_PL_ERROR: + break; } - if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows - styler.ColourTo(i, state); - chPrev = ch; - continue; + // Needed for specific continuation styles (one follows the other) + switch (sc.state) { + // continued from SCE_PL_WORD + case SCE_PL_FORMAT_IDENT: + // occupies HereDoc state 3 to avoid clashing with HERE docs + if (IsASpaceOrTab(sc.ch)) { // skip whitespace + sc.ChangeState(SCE_PL_DEFAULT); + while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) + sc.Forward(); + sc.SetState(SCE_PL_FORMAT_IDENT); + } + if (setFormatStart.Contains(sc.ch)) { // identifier or '=' + if (sc.ch != '=') { + do { + sc.Forward(); + } while (setFormat.Contains(sc.ch)); + } + while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) + sc.Forward(); + if (sc.ch == '=') { + sc.ForwardSetState(SCE_PL_DEFAULT); + HereDoc.State = 3; + } else { + // invalid indentifier; inexact fallback, but hey + sc.ChangeState(SCE_PL_IDENTIFIER); + sc.SetState(SCE_PL_DEFAULT); + } + } else { + sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier + } + backFlag = BACK_NONE; + break; } - if (HereDoc.State == 1 && isEOLChar(ch)) { + // Must check end of HereDoc states here before default state is handled + if (HereDoc.State == 1 && sc.atLineEnd) { // Begin of here-doc (the line after the here-doc delimiter): // Lexically, the here-doc starts from the next line after the >>, but the // first line of here-doc seem to follow the style of the last EOL sequence + int st_new = SCE_PL_HERE_QQ; HereDoc.State = 2; if (HereDoc.Quoted) { - if (state == SCE_PL_HERE_DELIM) { + if (sc.state == SCE_PL_HERE_DELIM) { // Missing quote at end of string! We are stricter than perl. // Colour here-doc anyway while marking this bit as an error. - state = SCE_PL_ERROR; + sc.ChangeState(SCE_PL_ERROR); } - styler.ColourTo(i - 1, state); switch (HereDoc.Quote) { - case '\'': - state = SCE_PL_HERE_Q ; - break; - case '"': - state = SCE_PL_HERE_QQ; - break; - case '`': - state = SCE_PL_HERE_QX; - break; + case '\'': st_new = SCE_PL_HERE_Q ; break; + case '"' : st_new = SCE_PL_HERE_QQ; break; + case '`' : st_new = SCE_PL_HERE_QX; break; } } else { - styler.ColourTo(i - 1, state); - switch (HereDoc.Quote) { - case '\\': - state = SCE_PL_HERE_Q ; - break; - default : - state = SCE_PL_HERE_QQ; - } + if (HereDoc.Quote == '\\') + st_new = SCE_PL_HERE_Q; } + sc.SetState(st_new); + } + if (HereDoc.State == 3 && sc.atLineEnd) { + // Start of format body. + HereDoc.State = 0; + sc.SetState(SCE_PL_FORMAT); } - if (HereDoc.State == 4 && isEOLChar(ch)) { - // Start of format body. - HereDoc.State = 0; - styler.ColourTo(i - 1, state); - state = SCE_PL_FORMAT; - } - if (state == SCE_PL_DEFAULT) { - if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) && - (ch == '.' || ch == 'v'))) { - state = SCE_PL_NUMBER; - backflag = BACK_NONE; + // Determine if a new state should be entered. + if (sc.state == SCE_PL_DEFAULT) { + if (IsADigit(sc.ch) || + (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { + sc.SetState(SCE_PL_NUMBER); + backFlag = BACK_NONE; numState = PERLNUM_DECIMAL; dotCount = 0; - if (ch == '0') { // hex,bin,octal - if (chNext == 'x') { + if (sc.ch == '0') { // hex,bin,octal + if (sc.chNext == 'x') { numState = PERLNUM_HEX; - } else if (chNext == 'b') { - numState = PERLNUM_BINARY; - } else if (isascii(chNext) && isdigit(chNext)) { - numState = PERLNUM_OCTAL; - } - if (numState != PERLNUM_DECIMAL) { - i++; - ch = chNext; - chNext = chNext2; - } - } else if (ch == 'v') { // vector + } else if (sc.chNext == 'b') { + numState = PERLNUM_BINARY; + } else if (IsADigit(sc.chNext)) { + numState = PERLNUM_OCTAL; + } + if (numState != PERLNUM_DECIMAL) { + sc.Forward(); + } + } else if (sc.ch == 'v') { // vector numState = PERLNUM_V_VECTOR; } - } else if (isWordStart(ch)) { - // if immediately prefixed by '::', always a bareword - state = SCE_PL_WORD; - if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { - state = SCE_PL_IDENTIFIER; - } - unsigned int kw = i + 1; - // first check for possible quote-like delimiter - if (ch == 's' && !isNonQuote(chNext)) { - state = SCE_PL_REGSUBST; + } else if (setWord.Contains(sc.ch)) { + // if immediately prefixed by '::', always a bareword + sc.SetState(SCE_PL_WORD); + if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { + sc.ChangeState(SCE_PL_IDENTIFIER); + } + unsigned int bk = sc.currentPos; + unsigned int fw = sc.currentPos + 1; + // first check for possible quote-like delimiter + if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - } else if (ch == 'm' && !isNonQuote(chNext)) { - state = SCE_PL_REGEX; - Quote.New(1); - } else if (ch == 'q' && !isNonQuote(chNext)) { - state = SCE_PL_STRING_Q; - Quote.New(1); - } else if (ch == 'y' && !isNonQuote(chNext)) { - state = SCE_PL_REGSUBST; + } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGEX); + Quote.New(); + } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_STRING_Q); + Quote.New(); + } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) { - state = SCE_PL_REGSUBST; + } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - kw++; - } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) { - if (chNext == 'q') state = SCE_PL_STRING_QQ; - else if (chNext == 'x') state = SCE_PL_STRING_QX; - else if (chNext == 'r') state = SCE_PL_STRING_QR; - else if (chNext == 'w') state = SCE_PL_STRING_QW; - Quote.New(1); - kw++; - } else if (ch == 'x' && (chNext == '=' || // repetition - !isWordStart(chNext) || - (isdigit(chPrev) && isdigit(chNext)))) { - state = SCE_PL_OPERATOR; - } - // if potentially a keyword, scan forward and grab word, then check - // if it's really one; if yes, disambiguation test is performed - // otherwise it is always a bareword and we skip a lot of scanning - // note: keywords assumed to be limited to [_a-zA-Z] only - if (state == SCE_PL_WORD) { - while (isWordStart(styler.SafeGetCharAt(kw))) kw++; - if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { - state = SCE_PL_IDENTIFIER; - } - } - // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this - // for quote-like delimiters/keywords, attempt to disambiguate - // to select for bareword, change state -> SCE_PL_IDENTIFIER - if (state != SCE_PL_IDENTIFIER && i > 0) { - unsigned int j = i; - bool moreback = false; // true if passed newline/comments - bool brace = false; // true if opening brace found - char ch2; - // first look backwards past whitespace/comments for EOLs - // if BACK_NONE, neither operator nor keyword, so skip test - if (backflag != BACK_NONE) { - while (--j > backPos) { - if (isEOLChar(styler.SafeGetCharAt(j))) - moreback = true; - } - ch2 = styler.SafeGetCharAt(j); - if (ch2 == '{' && !moreback) { - // {bareword: possible variable spec - brace = true; - } else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&') - // &bareword: subroutine call - || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-') - // ->bareword: part of variable spec - || (ch2 == 'b' && styler.Match(j - 2, "su"))) { - // sub bareword: subroutine declaration - // (implied BACK_KEYWORD, no keywords end in 'sub'!) - state = SCE_PL_IDENTIFIER; - } - // if status still ambiguous, look forward after word past - // tabs/spaces only; if ch2 isn't one of '[{(,' it can never - // match anything, so skip the whole thing - j = kw; - if (state != SCE_PL_IDENTIFIER - && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',') - && kw < lengthDoc) { - while (ch2 = styler.SafeGetCharAt(j), - (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) { - j++; - } - if ((ch2 == '}' && brace) - // {bareword}: variable spec - || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) { - // [{(, bareword=>: hash literal - state = SCE_PL_IDENTIFIER; - } - } - } - } - backflag = BACK_NONE; - // an identifier or bareword - if (state == SCE_PL_IDENTIFIER) { - if ((!isWordStart(chNext) && chNext != '\'') - || (chNext == '.' && chNext2 == '.')) { - // We need that if length of word == 1! - // This test is copied from the SCE_PL_WORD handler. - styler.ColourTo(i, SCE_PL_IDENTIFIER); - state = SCE_PL_DEFAULT; - } - // a keyword - } else if (state == SCE_PL_WORD) { - i = kw - 1; - if (ch == '_' && chNext == '_' && - (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") - || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) { - styler.ColourTo(i, SCE_PL_DATASECTION); - state = SCE_PL_DATASECTION; - } else { - if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) { - state = SCE_PL_FORMAT_IDENT; - HereDoc.State = 0; - } else { - state = SCE_PL_DEFAULT; - } - styler.ColourTo(i, SCE_PL_WORD); - backflag = BACK_KEYWORD; - backPos = i; - } - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - // a repetition operator 'x' - } else if (state == SCE_PL_OPERATOR) { - state = SCE_PL_DEFAULT; - goto handleOperator; - // quote-like delimiter, skip one char if double-char delimiter - } else { - i = kw - 1; - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (ch == '#') { - state = SCE_PL_COMMENTLINE; - } else if (ch == '\"') { - state = SCE_PL_STRING; - Quote.New(1); - Quote.Open(ch); - backflag = BACK_NONE; - } else if (ch == '\'') { - if (chPrev == '&') { + sc.Forward(); + fw++; + } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) + && !setWord.Contains(sc.GetRelative(2))) { + if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); + else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); + else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); + else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w' + Quote.New(); + sc.Forward(); + fw++; + } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition + !setWord.Contains(sc.chNext) || + (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { + sc.ChangeState(SCE_PL_OPERATOR); + } + // if potentially a keyword, scan forward and grab word, then check + // if it's really one; if yes, disambiguation test is performed + // otherwise it is always a bareword and we skip a lot of scanning + if (sc.state == SCE_PL_WORD) { + while (setWord.Contains(static_cast(styler.SafeGetCharAt(fw)))) + fw++; + if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { + sc.ChangeState(SCE_PL_IDENTIFIER); + } + } + // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this + // for quote-like delimiters/keywords, attempt to disambiguate + // to select for bareword, change state -> SCE_PL_IDENTIFIER + if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { + if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) + sc.ChangeState(SCE_PL_IDENTIFIER); + } + backFlag = BACK_NONE; + } else if (sc.ch == '#') { + sc.SetState(SCE_PL_COMMENTLINE); + } else if (sc.ch == '\"') { + sc.SetState(SCE_PL_STRING); + Quote.New(); + Quote.Open(sc.ch); + backFlag = BACK_NONE; + } else if (sc.ch == '\'') { + if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { // Archaic call - styler.ColourTo(i, state); + sc.SetState(SCE_PL_IDENTIFIER); } else { - state = SCE_PL_CHARACTER; - Quote.New(1); - Quote.Open(ch); + sc.SetState(SCE_PL_CHARACTER); + Quote.New(); + Quote.Open(sc.ch); } - backflag = BACK_NONE; - } else if (ch == '`') { - state = SCE_PL_BACKTICKS; - Quote.New(1); - Quote.Open(ch); - backflag = BACK_NONE; - } else if (ch == '$') { - if ((chNext == '{') || isspacechar(chNext)) { - styler.ColourTo(i, SCE_PL_SCALAR); + backFlag = BACK_NONE; + } else if (sc.ch == '`') { + sc.SetState(SCE_PL_BACKTICKS); + Quote.New(); + Quote.Open(sc.ch); + backFlag = BACK_NONE; + } else if (sc.ch == '$') { + sc.SetState(SCE_PL_SCALAR); + if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else if (IsASpace(sc.chNext)) { + sc.ForwardSetState(SCE_PL_DEFAULT); } else { - state = SCE_PL_SCALAR; - if ((chNext == '`' && chNext2 == '`') - || (chNext == ':' && chNext2 == ':')) { - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else { - i++; - ch = chNext; - chNext = chNext2; + sc.Forward(); + if (sc.Match('`', '`') || sc.Match(':', ':')) { + sc.Forward(); } } - backflag = BACK_NONE; - } else if (ch == '@') { - if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '+' || chNext == '-') { - state = SCE_PL_ARRAY; - } else if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_ARRAY; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (chNext != '{' && chNext != '[') { - styler.ColourTo(i, SCE_PL_ARRAY); + backFlag = BACK_NONE; + } else if (sc.ch == '@') { + sc.SetState(SCE_PL_ARRAY); + if (setArray.Contains(sc.chNext)) { + // no special treatment + } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{' || sc.chNext == '[') { + sc.ForwardSetState(SCE_PL_OPERATOR); } else { - styler.ColourTo(i, SCE_PL_ARRAY); + sc.ChangeState(SCE_PL_OPERATOR); } - backflag = BACK_NONE; - } else if (ch == '%') { - backflag = BACK_NONE; - if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '!' || chNext == '^') { - state = SCE_PL_HASH; - i++; - ch = chNext; - chNext = chNext2; - } else if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_HASH; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (chNext == '{') { - styler.ColourTo(i, SCE_PL_HASH); - } else { - goto handleOperator; - } - } else if (ch == '*') { - backflag = BACK_NONE; - char strch[2]; - strch[0] = chNext; - strch[1] = '\0'; - if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_SYMBOLTABLE; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (!isascii(chNext) || isalpha(chNext) || chNext == '_' - || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { - state = SCE_PL_SYMBOLTABLE; - i++; - ch = chNext; - chNext = chNext2; - } else if (chNext == '{') { - styler.ColourTo(i, SCE_PL_SYMBOLTABLE); - } else { - if (chNext == '*') { // exponentiation - i++; - ch = chNext; - chNext = chNext2; - } - goto handleOperator; - } - } else if (ch == '/' || (ch == '<' && chNext == '<')) { + backFlag = BACK_NONE; + } else if (setPreferRE.Contains(sc.ch)) { // Explicit backward peeking to set a consistent preferRE for // any slash found, so no longer need to track preferRE state. // Find first previous significant lexed element and interpret. - // Test for HERE doc start '<<' shares this code, helps to - // determine if it should be an operator. + // A few symbols shares this code for disambiguation. bool preferRE = false; - bool isHereDoc = (ch == '<'); - bool hereDocSpace = false; // these are for corner case: - bool hereDocScalar = false; // SCALAR [whitespace] '<<' - unsigned int bk = (i > 0)? i - 1: 0; - unsigned int bkend; - char bkch; + bool isHereDoc = sc.Match('<', '<'); + bool hereDocSpace = false; // for: SCALAR [whitespace] '<<' + unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; + unsigned int bkend; styler.Flush(); - if (styler.StyleAt(bk) == SCE_PL_DEFAULT) - hereDocSpace = true; - while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } + if (styler.StyleAt(bk) == SCE_PL_DEFAULT) + hereDocSpace = true; + skipWhitespaceComment(styler, bk); if (bk == 0) { - // position 0 won't really be checked; rarely happens - // hard to fix due to an unsigned index i + // avoid backward scanning breakage preferRE = true; } else { int bkstyle = styler.StyleAt(bk); - bkch = styler.SafeGetCharAt(bk); + int bkch = static_cast(styler.SafeGetCharAt(bk)); switch(bkstyle) { case SCE_PL_OPERATOR: preferRE = true; if (bkch == ')' || bkch == ']') { preferRE = false; } else if (bkch == '}') { - // backtrack further, count balanced brace pairs - // if a brace pair found, see if it's a variable - int braceCount = 1; - while (--bk > 0) { - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_OPERATOR) { - bkch = styler.SafeGetCharAt(bk); - if (bkch == ';') { // early out - break; - } else if (bkch == '}') { - braceCount++; - } else if (bkch == '{') { - if (--braceCount == 0) - break; - } - } - } - if (bk == 0) { - // at beginning, true - } else if (braceCount == 0) { - // balanced { found, bk>0, skip more whitespace - if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) { - while (bk > 0) { - bkstyle = styler.StyleAt(--bk); - if (bkstyle != SCE_PL_DEFAULT) - break; - } - } - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_SCALAR - || bkstyle == SCE_PL_ARRAY - || bkstyle == SCE_PL_HASH - || bkstyle == SCE_PL_SYMBOLTABLE - || bkstyle == SCE_PL_OPERATOR) { - preferRE = false; - } + // backtrack by counting balanced brace pairs + // needed to test for variables like ${}, @{} etc. + bkstyle = styleBeforeBracePair(styler, bk); + if (bkstyle == SCE_PL_SCALAR + || bkstyle == SCE_PL_ARRAY + || bkstyle == SCE_PL_HASH + || bkstyle == SCE_PL_SYMBOLTABLE + || bkstyle == SCE_PL_OPERATOR) { + preferRE = false; } + } else if (bkch == '+' || bkch == '-') { + if (bkch == static_cast(styler.SafeGetCharAt(bk - 1)) + && bkch != static_cast(styler.SafeGetCharAt(bk - 2))) + // exceptions for operators: unary suffixes ++, -- + preferRE = false; } break; case SCE_PL_IDENTIFIER: preferRE = true; - if (bkch == '>') { // inputsymbol + bkstyle = styleCheckIdentifier(styler, bk); + if ((bkstyle == 1) || (bkstyle == 2)) { + // inputsymbol or var with "->" or "::" before identifier preferRE = false; - break; - } - // backtrack to find "->" or "::" before identifier - while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { - bk--; - } - while (bk > 0) { - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_DEFAULT || - bkstyle == SCE_PL_COMMENTLINE) { - } else if (bkstyle == SCE_PL_OPERATOR) { - bkch = styler.SafeGetCharAt(bk); - // test for "->" and "::" - if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-') - || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) { - preferRE = false; - break; - } - } else { - // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/' - // if '//', always expect defined-or operator to follow identifier - if (!isHereDoc && - (isspacechar(chNext) || isdigit(chNext) || chNext == '/')) - preferRE = false; - // HERE docs cannot have a space after the >> - if (isspacechar(chNext)) - preferRE = false; - break; + } else if (bkstyle == 3) { + // bare identifier, test cases follows: + if (sc.ch == '/') { + // if '/', /PATTERN/ unless digit/space immediately after '/' + // if '//', always expect defined-or operator to follow identifier + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') + preferRE = false; + } else if (sc.ch == '*' || sc.ch == '%') { + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) + preferRE = false; + } else if (sc.ch == '<') { + if (IsASpace(sc.chNext) || sc.chNext == '=') + preferRE = false; } - bk--; } break; - case SCE_PL_SCALAR: // for $var<< case - hereDocScalar = true; - break; - // for HERE docs, always true for preferRE + case SCE_PL_SCALAR: // for $var<< case: + if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc + preferRE = true; + break; case SCE_PL_WORD: - preferRE = true; - if (isHereDoc) - break; - // adopt heuristics similar to vim-style rules: - // keywords always forced as /PATTERN/: split, if, elsif, while - // everything else /PATTERN/ unless digit/space immediately after '/' - // for '//', defined-or favoured unless special keywords - bkend = bk + 1; - while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) { - bk--; + preferRE = true; + // for HERE docs, always true + if (sc.ch == '/') { + // adopt heuristics similar to vim-style rules: + // keywords always forced as /PATTERN/: split, if, elsif, while + // everything else /PATTERN/ unless digit/space immediately after '/' + // for '//', defined-or favoured unless special keywords + bkend = bk + 1; + while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { + bk--; + } + if (isPerlKeyword(bk, bkend, reWords, styler)) + break; + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') + preferRE = false; + } else if (sc.ch == '*' || sc.ch == '%') { + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) + preferRE = false; + } else if (sc.ch == '<') { + if (IsASpace(sc.chNext) || sc.chNext == '=') + preferRE = false; } - if (isPerlKeyword(bk, bkend, reWords, styler)) - break; - if (isspacechar(chNext) || isdigit(chNext) || chNext == '/') - preferRE = false; - break; + break; // other styles uses the default, preferRE=false case SCE_PL_POD: - case SCE_PL_POD_VERB: case SCE_PL_HERE_Q: case SCE_PL_HERE_QQ: case SCE_PL_HERE_QX: @@ -709,555 +1054,114 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, break; } } - backflag = BACK_NONE; - if (isHereDoc) { // handle HERE doc - // if SCALAR whitespace '<<', *always* a HERE doc - if (preferRE || (hereDocSpace && hereDocScalar)) { - state = SCE_PL_HERE_DELIM; - HereDoc.State = 0; - } else { // << operator - i++; - ch = chNext; - chNext = chNext2; - goto handleOperator; - } - } else { // handle regexp - if (preferRE) { - state = SCE_PL_REGEX; - Quote.New(1); - Quote.Open(ch); - } else { // / and // operators - if (chNext == '/') { - i++; - ch = chNext; - chNext = chNext2; - } - goto handleOperator; - } - } - } else if (ch == '<') { - // looks forward for matching > on same line - unsigned int fw = i + 1; - while (fw < lengthDoc) { - char fwch = styler.SafeGetCharAt(fw); - if (fwch == ' ') { - if (styler.SafeGetCharAt(fw-1) != '\\' || - styler.SafeGetCharAt(fw-2) != '\\') - goto handleOperator; - } else if (isEOLChar(fwch) || isspacechar(fwch)) { - goto handleOperator; - } else if (fwch == '>') { - if ((fw - i) == 2 && // '<=>' case - styler.SafeGetCharAt(fw-1) == '=') { - goto handleOperator; - } - styler.ColourTo(fw, SCE_PL_IDENTIFIER); - i = fw; - ch = fwch; - chNext = styler.SafeGetCharAt(i+1); + backFlag = BACK_NONE; + if (isHereDoc) { // handle '<<', HERE doc + if (preferRE) { + sc.SetState(SCE_PL_HERE_DELIM); + HereDoc.State = 0; + } else { // << operator + sc.SetState(SCE_PL_OPERATOR); + sc.Forward(); } - fw++; - } - if (fw == lengthDoc) - goto handleOperator; - } else if (ch == '=' // POD - && isalpha(chNext) - && (isEOLChar(chPrev))) { - state = SCE_PL_POD; - backflag = BACK_NONE; - //sookedpos = 0; - //sooked[sookedpos] = '\0'; - } else if (ch == '-' // file test operators - && isSingleCharOp(chNext) - && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) { - styler.ColourTo(i + 1, SCE_PL_WORD); - state = SCE_PL_DEFAULT; - i++; - ch = chNext; - chNext = chNext2; - backflag = BACK_NONE; - } else if (ch == '-' // bareword promotion (-FOO cases) - && ((isascii(chNext) && isalpha(chNext)) || chNext == '_') - && backflag != BACK_NONE) { - state = SCE_PL_IDENTIFIER; - backflag = BACK_NONE; - } else if (ch == '(' && i > 0) { - // backtrack to identify if we're starting a sub prototype - // for generality, we need to ignore whitespace/comments - unsigned int bk = i - 1; // i > 0 tested above - styler.Flush(); - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } - if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier - goto handleOperator; - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { - bk--; - } - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } - if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword - || !styler.Match(bk - 2, "sub")) // assume suffix is unique! - goto handleOperator; - state = SCE_PL_SUB_PROTOTYPE; - backflag = BACK_NONE; - backPos = i; // needed for restart - } else if (isPerlOperator(ch)) { - if (ch == '.' && chNext == '.') { // .. and ... - i++; - if (chNext2 == '.') { i++; } - state = SCE_PL_DEFAULT; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } - handleOperator: - styler.ColourTo(i, SCE_PL_OPERATOR); - backflag = BACK_OPERATOR; - backPos = i; - } else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source - styler.ColourTo(i, SCE_PL_DATASECTION); - state = SCE_PL_DATASECTION; - } else { - // keep colouring defaults to make restart easier - styler.ColourTo(i, SCE_PL_DEFAULT); - } - } else if (state == SCE_PL_NUMBER) { - if (ch == '.') { - if (chNext == '.') { - // double dot is always an operator - goto numAtEnd; - } else if (numState <= PERLNUM_FLOAT) { - // non-decimal number or float exponent, consume next dot - styler.ColourTo(i - 1, SCE_PL_NUMBER); - state = SCE_PL_DEFAULT; - goto handleOperator; - } else { // decimal or vectors allows dots - dotCount++; - if (numState == PERLNUM_DECIMAL) { - if (dotCount > 1) { - if (isdigit(chNext)) { // really a vector - numState = PERLNUM_VECTOR; - } else // number then dot - goto numAtEnd; + } else if (sc.ch == '*') { // handle '*', typeglob + if (preferRE) { + sc.SetState(SCE_PL_SYMBOLTABLE); + if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else { + sc.Forward(); } - } else { // vectors - if (!isdigit(chNext)) // vector then dot - goto numAtEnd; + } else { + sc.SetState(SCE_PL_OPERATOR); + if (sc.chNext == '*') // exponentiation + sc.Forward(); } - } - } else if (ch == '_') { - // permissive underscoring for number and vector literals - } else if (!isascii(ch) || isalnum(ch)) { - if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { - if (!isascii(ch) || isalpha(ch)) { - if (dotCount == 0) { // change to word - state = SCE_PL_IDENTIFIER; - } else { // vector then word - goto numAtEnd; + } else if (sc.ch == '%') { // handle '%', hash + if (preferRE) { + sc.SetState(SCE_PL_HASH); + if (setHash.Contains(sc.chNext)) { + sc.Forward(); + } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else { + sc.ChangeState(SCE_PL_OPERATOR); + } + } else { + sc.SetState(SCE_PL_OPERATOR); + } + } else if (sc.ch == '<') { // handle '<', inputsymbol + if (preferRE) { + // forward scan + int i = inputsymbolScan(styler, sc.currentPos, endPos); + if (i > 0) { + sc.SetState(SCE_PL_IDENTIFIER); + sc.Forward(i); + } else { + sc.SetState(SCE_PL_OPERATOR); + } + } else { + sc.SetState(SCE_PL_OPERATOR); + } + } else { // handle '/', regexp + if (preferRE) { + sc.SetState(SCE_PL_REGEX); + Quote.New(); + Quote.Open(sc.ch); + } else { // / and // operators + sc.SetState(SCE_PL_OPERATOR); + if (sc.chNext == '/') { + sc.Forward(); } } - } else if (numState == PERLNUM_DECIMAL) { - if (ch == 'E' || ch == 'e') { // exponent - numState = PERLNUM_FLOAT; - if (chNext == '+' || chNext == '-') { - i++; - ch = chNext; - chNext = chNext2; - } - } else if (!isascii(ch) || !isdigit(ch)) { // number then word - goto numAtEnd; - } - } else if (numState == PERLNUM_FLOAT) { - if (!isdigit(ch)) { // float then word - goto numAtEnd; - } - } else if (numState == PERLNUM_OCTAL) { - if (!isdigit(ch)) - goto numAtEnd; - else if (ch > '7') - numState = PERLNUM_BAD; - } else if (numState == PERLNUM_BINARY) { - if (!isdigit(ch)) - goto numAtEnd; - else if (ch > '1') - numState = PERLNUM_BAD; - } else if (numState == PERLNUM_HEX) { - int ch2 = toupper(ch); - if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F')) - goto numAtEnd; - } else {//(numState == PERLNUM_BAD) { - if (!isdigit(ch)) - goto numAtEnd; - } + } + } else if (sc.ch == '=' // POD + && setPOD.Contains(sc.chNext) + && sc.atLineStart) { + sc.SetState(SCE_PL_POD); + backFlag = BACK_NONE; + } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases + unsigned int bk = sc.currentPos; + unsigned int fw = 2; + if (setSingleCharOp.Contains(sc.chNext) && // file test operators + !setWord.Contains(sc.GetRelative(2))) { + sc.SetState(SCE_PL_WORD); + } else { + // nominally a minus and bareword; find extent of bareword + while (setWord.Contains(sc.GetRelative(fw))) + fw++; + sc.SetState(SCE_PL_OPERATOR); + } + // force to bareword for hash key => or {variable literal} cases + if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { + sc.ChangeState(SCE_PL_IDENTIFIER); + } + backFlag = BACK_NONE; + } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype + if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { + sc.SetState(SCE_PL_SUB_PROTOTYPE); + backFlag = BACK_NONE; + } else { + sc.SetState(SCE_PL_OPERATOR); + } + } else if (setPerlOperator.Contains(sc.ch)) { // operators + sc.SetState(SCE_PL_OPERATOR); + if (sc.Match('.', '.')) { // .. and ... + sc.Forward(); + if (sc.chNext == '.') sc.Forward(); + } + } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source + sc.SetState(SCE_PL_DATASECTION); } else { - // complete current number or vector - numAtEnd: - styler.ColourTo(i - 1, actualNumStyle(numState)); - state = SCE_PL_DEFAULT; - goto restartLexer; + // keep colouring defaults + sc.Complete(); } - } else if (state == SCE_PL_IDENTIFIER) { - if (!isWordStart(chNext) && chNext != '\'') { - styler.ColourTo(i, SCE_PL_IDENTIFIER); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } else { - if (state == SCE_PL_COMMENTLINE) { - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; - } else if (isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } - } else if (state == SCE_PL_HERE_DELIM) { - // - // From perldata.pod: - // ------------------ - // A line-oriented form of quoting is based on the shell ``here-doc'' - // syntax. - // Following a << you specify a string to terminate the quoted material, - // and all lines following the current line down to the terminating - // string are the value of the item. - // The terminating string may be either an identifier (a word), - // or some quoted text. - // If quoted, the type of quotes you use determines the treatment of - // the text, just as in regular quoting. - // An unquoted identifier works like double quotes. - // There must be no space between the << and the identifier. - // (If you put a space it will be treated as a null identifier, - // which is valid, and matches the first empty line.) - // (This is deprecated, -w warns of this syntax) - // The terminating string must appear by itself (unquoted and with no - // surrounding whitespace) on the terminating line. - // - // From Bash info: - // --------------- - // Specifier format is: <<[-]WORD - // Optional '-' is for removal of leading tabs from here-doc. - // Whitespace acceptable after <<[-] operator. - // - if (HereDoc.State == 0) { // '<<' encountered - bool gotspace = false; - unsigned int oldi = i; - if (chNext == ' ' || chNext == '\t') { - // skip whitespace; legal for quoted delimiters - gotspace = true; - do { - i++; - chNext = styler.SafeGetCharAt(i + 1); - } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t')); - chNext2 = styler.SafeGetCharAt(i + 2); - } - HereDoc.State = 1; - HereDoc.Quote = chNext; - HereDoc.Quoted = false; - HereDoc.DelimiterLength = 0; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - if (chNext == '\'' || chNext == '"' || chNext == '`') { - // a quoted here-doc delimiter - i++; - ch = chNext; - chNext = chNext2; - HereDoc.Quoted = true; - } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\' - || chNext == '=' || chNext == '$' || chNext == '@' - || ((isalpha(chNext) || chNext == '_') && gotspace)) { - // left shift << or <<= operator cases - // restore position if operator - i = oldi; - styler.ColourTo(i, SCE_PL_OPERATOR); - state = SCE_PL_DEFAULT; - backflag = BACK_OPERATOR; - backPos = i; - HereDoc.State = 0; - goto restartLexer; - } else { - // an unquoted here-doc delimiter, no special handling - // (cannot be prefixed by spaces/tabs), or - // symbols terminates; deprecated zero-length delimiter - } - - } else if (HereDoc.State == 1) { // collect the delimiter - backflag = BACK_NONE; - if (HereDoc.Quoted) { // a quoted here-doc delimiter - if (ch == HereDoc.Quote) { // closing quote => end of delimiter - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } else { - if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote - i++; - ch = chNext; - chNext = chNext2; - } - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - } - } else { // an unquoted here-doc delimiter - if (isalnum(ch) || ch == '_') { - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; - } - } - if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { - styler.ColourTo(i - 1, state); - state = SCE_PL_ERROR; - goto restartLexer; - } - } - } else if (HereDoc.State == 2) { - // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX - if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { - i += HereDoc.DelimiterLength; - chPrev = styler.SafeGetCharAt(i - 1); - ch = styler.SafeGetCharAt(i); - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - backflag = BACK_NONE; - HereDoc.State = 0; - goto restartLexer; - } - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (state == SCE_PL_POD - || state == SCE_PL_POD_VERB) { - if (isEOLChar(chPrev)) { - if (ch == ' ' || ch == '\t') { - styler.ColourTo(i - 1, state); - state = SCE_PL_POD_VERB; - } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_POD; - if (ch == '=') { - if (isMatch(styler, lengthDoc, i, "=cut")) { - styler.ColourTo(i - 1 + 4, state); - i += 4; - state = SCE_PL_DEFAULT; - ch = styler.SafeGetCharAt(i); - //chNext = styler.SafeGetCharAt(i + 1); - goto restartLexer; - } - } - } - } - } else if (state == SCE_PL_SCALAR // variable names - || state == SCE_PL_ARRAY - || state == SCE_PL_HASH - || state == SCE_PL_SYMBOLTABLE) { - if (ch == ':' && chNext == ':') { // skip :: - i++; - ch = chNext; - chNext = chNext2; - } - else if (isEndVar(ch)) { - if (i == (styler.GetStartSegment() + 1)) { - // Special variable: $(, $_ etc. - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; - } - } - } else if (state == SCE_PL_REGEX - || state == SCE_PL_STRING_QR - ) { - if (!Quote.Up && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - // SG: Is it save to skip *every* escaped char? - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else { - if (ch == Quote.Down /*&& chPrev != '\\'*/) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } - if (!isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { - Quote.Count++; - } else if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - } - } else if (state == SCE_PL_REGSUBST) { - if (!Quote.Up && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - // SG: Is it save to skip *every* escaped char? - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else { - if (Quote.Count == 0 && Quote.Rep == 1) { - /* We matched something like s(...) or tr{...} - * and are looking for the next matcher characters, - * which could be either bracketed ({...}) or non-bracketed - * (/.../). - * - * Number-signs are problematic. If they occur after - * the close of the first part, treat them like - * a Quote.Up char, even if they actually start comments. - * - * If we find an alnum, we end the regsubst, and punt. - * - * Eric Promislow ericp@activestate.com Aug 9,2000 - */ - if (isspacechar(ch)) { - // Keep going - } - else if (!isascii(ch) || isalnum(ch)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } else { - Quote.Open(ch); - } - } else if (ch == Quote.Down /*&& chPrev != '\\'*/) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - } - if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { - Quote.Count++; - } else if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - } - } else if (state == SCE_PL_STRING_Q - || state == SCE_PL_STRING_QQ - || state == SCE_PL_STRING_QX - || state == SCE_PL_STRING_QW - || state == SCE_PL_STRING - || state == SCE_PL_CHARACTER - || state == SCE_PL_BACKTICKS - ) { - if (!Quote.Down && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else if (ch == Quote.Down) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } - } else if (ch == Quote.Up) { - Quote.Count++; - } - } else if (state == SCE_PL_SUB_PROTOTYPE) { - char strch[2]; - strch[0] = ch; - strch[1] = '\0'; - if (NULL != strstr("\\[$@%&*];", strch)) { - // keep going - } else if (ch == ')') { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } else { - // abandon prototype, restart from '(' - i = backPos; - styler.ColourTo(i, SCE_PL_OPERATOR); - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - state = SCE_PL_DEFAULT; - } - } else if (state == SCE_PL_FORMAT_IDENT) { - // occupies different HereDoc states to avoid clashing with HERE docs - if (HereDoc.State == 0) { - if ((isascii(ch) && isalpha(ch)) || ch == '_' // probable identifier - || ch == '=') { // no identifier - HereDoc.State = 3; - HereDoc.Quoted = false; // whitespace flag - } else if (ch == ' ' || ch == '\t') { - styler.ColourTo(i, SCE_PL_DEFAULT); - } else { - state = SCE_PL_DEFAULT; - HereDoc.State = 0; - goto restartLexer; - } - } - if (HereDoc.State == 3) { // with just a '=', state goes 0->3->4 - if (ch == '=') { - styler.ColourTo(i, SCE_PL_FORMAT_IDENT); - state = SCE_PL_DEFAULT; - HereDoc.State = 4; - } else if (ch == ' ' || ch == '\t') { - HereDoc.Quoted = true; - } else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) { - // abandon format, restart from after 'format' - i = backPos + 1; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - state = SCE_PL_DEFAULT; - HereDoc.State = 0; - } - } - } else if (state == SCE_PL_FORMAT) { - if (isEOLChar(chPrev)) { - styler.ColourTo(i - 1, state); - if (ch == '.' && isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } - } - } } - if (state == SCE_PL_ERROR) { - break; - } - chPrev = ch; } - styler.ColourTo(lengthDoc - 1, state); + sc.Complete(); } static bool IsCommentLine(int line, Accessor &styler) { @@ -1265,17 +1169,17 @@ static bool IsCommentLine(int line, Accessor &styler) { int eol_pos = styler.LineStart(line + 1) - 1; for (int i = pos; i < eol_pos; i++) { char ch = styler[i]; - int style = styler.StyleAt(i); + int style = styler.StyleAt(i); if (ch == '#' && style == SCE_PL_COMMENTLINE) return true; - else if (ch != ' ' && ch != '\t') + else if (!IsASpaceOrTab(ch)) return false; } return false; } static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], - Accessor &styler) { + Accessor &styler) { bool foldComment = styler.GetPropertyInt("fold.comment") != 0; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; // Custom folding of POD and packages @@ -1300,18 +1204,18 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], int style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); - bool atLineStart = isEOLChar(chPrev) || i == 0; - // Comment folding + bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; + // Comment folding if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) - { - if (!IsCommentLine(lineCurrent - 1, styler) - && IsCommentLine(lineCurrent + 1, styler)) - levelCurrent++; - else if (IsCommentLine(lineCurrent - 1, styler) - && !IsCommentLine(lineCurrent+1, styler)) - levelCurrent--; - } - if (style == SCE_C_OPERATOR) { + { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent+1, styler)) + levelCurrent--; + } + if (style == SCE_PL_OPERATOR) { if (ch == '{') { levelCurrent++; } else if (ch == '}') { @@ -1329,17 +1233,17 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], else if (styler.Match(i, "=head")) isPodHeading = true; } else if (style == SCE_PL_DATASECTION) { - if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) - levelCurrent++; - else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) - levelCurrent--; - else if (styler.Match(i, "=head")) + if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) + levelCurrent++; + else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) + levelCurrent--; + else if (styler.Match(i, "=head")) isPodHeading = true; - // if package used or unclosed brace, level > SC_FOLDLEVELBASE! - // reset needed as level test is vs. SC_FOLDLEVELBASE - else if (styler.Match(i, "__END__")) - levelCurrent = SC_FOLDLEVELBASE; - } + // if package used or unclosed brace, level > SC_FOLDLEVELBASE! + // reset needed as level test is vs. SC_FOLDLEVELBASE + else if (styler.Match(i, "__END__")) + levelCurrent = SC_FOLDLEVELBASE; + } } // Custom package folding if (foldPackage && atLineStart) { @@ -1351,9 +1255,9 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], if (atEOL) { int lev = levelPrev; if (isPodHeading) { - lev = levelPrev - 1; - lev |= SC_FOLDLEVELHEADERFLAG; - isPodHeading = false; + lev = levelPrev - 1; + lev |= SC_FOLDLEVELHEADERFLAG; + isPodHeading = false; } // Check if line was a package declaration // because packages need "special" treatment @@ -1362,7 +1266,7 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], levelCurrent = SC_FOLDLEVELBASE + 1; isPackageLine = false; } - lev |= levelCurrent << 16; + lev |= levelCurrent << 16; if (visibleChars == 0 && foldCompact) lev |= SC_FOLDLEVELWHITEFLAG; if ((levelCurrent > levelPrev) && (visibleChars > 0)) @@ -1389,4 +1293,3 @@ static const char * const perlWordListDesc[] = { }; LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8); - diff --git a/scintilla/LexRuby.cxx b/scintilla/LexRuby.cxx index 18e94d4d..7cb0b95c 100644 --- a/scintilla/LexRuby.cxx +++ b/scintilla/LexRuby.cxx @@ -1641,7 +1641,7 @@ static void FoldRbDoc(unsigned int startPos, int length, int initStyle, if (foldComment && stylePrev != SCE_RB_COMMENTLINE) { if (chNext == '{') { levelCurrent++; - } else if (chNext == '}') { + } else if (chNext == '}' && levelCurrent > 0) { levelCurrent--; } } @@ -1692,6 +1692,7 @@ static void FoldRbDoc(unsigned int startPos, int length, int initStyle, visibleChars++; buffer_ends_with_eol = false; } + stylePrev = style; } // Fill in the real level of the next line, keeping the current flags as they will be filled in later if (!buffer_ends_with_eol) { diff --git a/scintilla/PlatGTK.cxx b/scintilla/PlatGTK.cxx index 725c35ca..81c0b219 100644 --- a/scintilla/PlatGTK.cxx +++ b/scintilla/PlatGTK.cxx @@ -1071,22 +1071,22 @@ void SurfaceImpl::AlphaRectangle(PRectangle rc, int cornerSize, ColourAllocated guint32 valOutline = *(reinterpret_cast(pixVal)); guint32 *pixels = reinterpret_cast(gdk_pixbuf_get_pixels(pixalpha)); int stride = gdk_pixbuf_get_rowstride(pixalpha) / 4; - for (int y=0; ypfd) { char *utfForm = 0; @@ -1271,7 +1271,7 @@ void SurfaceImpl::DrawTextBase(PRectangle rc, Font &font_, int ybase, const char } pango_layout_set_font_description(layout, PFont(font_)->pfd); PangoLayoutLine *pll = pango_layout_get_line(layout,0); - gdk_draw_layout_line(drawable, gc, x, ybase, pll); + gdk_draw_layout_line(drawable, gc, xText, ybase, pll); if (useGFree) { g_free(utfForm); } else { @@ -1302,13 +1302,13 @@ void SurfaceImpl::DrawTextBase(PRectangle rc, Font &font_, int ybase, const char draw8bit = false; wctext[wclen] = L'\0'; GdkWChar *wcp = wctext; - while ((wclen > 0) && (x < maxCoordinate)) { + while ((wclen > 0) && (xText < maxCoordinate)) { int lenDraw = Platform::Minimum(wclen, segmentLength); gdk_draw_text_wc(drawable, PFont(font_)->pfont, gc, - x, ybase, wcp, lenDraw); + xText, ybase, wcp, lenDraw); wclen -= lenDraw; if (wclen > 0) { // Avoid next calculation if possible as may be expensive - x += gdk_text_width_wc(PFont(font_)->pfont, + xText += gdk_text_width_wc(PFont(font_)->pfont, wcp, lenDraw); } wcp += lenDraw; @@ -1316,13 +1316,13 @@ void SurfaceImpl::DrawTextBase(PRectangle rc, Font &font_, int ybase, const char } } if (draw8bit) { - while ((len > 0) && (x < maxCoordinate)) { + while ((len > 0) && (xText < maxCoordinate)) { int lenDraw = Platform::Minimum(len, segmentLength); gdk_draw_text(drawable, PFont(font_)->pfont, gc, - x, ybase, s, lenDraw); + xText, ybase, s, lenDraw); len -= lenDraw; if (len > 0) { // Avoid next calculation if possible as may be expensive - x += gdk_text_width(PFont(font_)->pfont, s, lenDraw); + xText += gdk_text_width(PFont(font_)->pfont, s, lenDraw); } s += lenDraw; } @@ -1985,7 +1985,7 @@ public: doubleClickAction = action; doubleClickActionData = data; } - virtual void SetList(const char* list, char separator, char typesep); + virtual void SetList(const char *listText, char separator, char typesep); }; ListBox *ListBox::Allocate() { @@ -2490,12 +2490,12 @@ void ListBoxX::ClearRegisteredImages() { xset.Clear(); } -void ListBoxX::SetList(const char* list, char separator, char typesep) { +void ListBoxX::SetList(const char *listText, char separator, char typesep) { Clear(); - int count = strlen(list) + 1; + int count = strlen(listText) + 1; char *words = new char[count]; if (words) { - memcpy(words, list, count); + memcpy(words, listText, count); char *startword = words; char *numword = NULL; int i = 0; diff --git a/scintilla/RESearch.cxx b/scintilla/RESearch.cxx index b1b226a0..57c745e2 100644 --- a/scintilla/RESearch.cxx +++ b/scintilla/RESearch.cxx @@ -33,7 +33,7 @@ * Interfaces: * RESearch::Compile: compile a regular expression into a NFA. * - * const char *RESearch::Compile(const char *pat, int length, + * const char *RESearch::Compile(const char *pattern, int length, * bool caseSensitive, bool posix) * * Returns a short error string if they fail. @@ -347,13 +347,13 @@ static int GetHexaChar(unsigned char hd1, unsigned char hd2) { /** * Called when the parser finds a backslash not followed * by a valid expression (like \( in non-Posix mode). - * @param pat: pointer on the char after the backslash. + * @param pattern: pointer on the char after the backslash. * @param incr: (out) number of chars to skip after expression evaluation. * @return the char if it resolves to a simple char, * or -1 for a char class. In this case, bittab is changed. */ int RESearch::GetBackslashExpression( - const char *pat, + const char *pattern, int &incr) { // Since error reporting is primitive and messages are not used anyway, // I choose to interpret unexpected syntax in a logical way instead @@ -361,7 +361,7 @@ int RESearch::GetBackslashExpression( incr = 0; // Most of the time, will skip the char "naturally". int c; int result = -1; - unsigned char bsc = *pat; + unsigned char bsc = *pattern; if (!bsc) { // Avoid overrun result = '\\'; // \ at end of pattern, take it literally @@ -379,8 +379,8 @@ int RESearch::GetBackslashExpression( result = escapeValue(bsc); break; case 'x': { - unsigned char hd1 = *(pat + 1); - unsigned char hd2 = *(pat + 2); + unsigned char hd1 = *(pattern + 1); + unsigned char hd2 = *(pattern + 2); int hexValue = GetHexaChar(hd1, hd2); if (hexValue >= 0) { result = hexValue; @@ -436,7 +436,7 @@ int RESearch::GetBackslashExpression( return result; } -const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, bool posix) { +const char *RESearch::Compile(const char *pattern, int length, bool caseSensitive, bool posix) { char *mp=nfa; /* nfa pointer */ char *lp; /* saved pointer */ char *sp=nfa; /* another one */ @@ -449,14 +449,14 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b char mask; /* xor mask -CCL/NCL */ int c1, c2, prevChar; - if (!pat || !length) + if (!pattern || !length) if (sta) return 0; else return badpat("No previous regular expression"); sta = NOP; - const char *p=pat; /* pattern pointer */ + const char *p=pattern; /* pattern pointer */ for (int i=0; i mpMax) return badpat("Pattern too long"); @@ -468,7 +468,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b break; case '^': /* match beginning */ - if (p == pat) + if (p == pattern) *mp++ = BOL; else { *mp++ = CHR; @@ -588,7 +588,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b case '*': /* match 0 or more... */ case '+': /* match 1 or more... */ - if (p == pat) + if (p == pattern) return badpat("Empty closure"); lp = sp; /* previous opcode */ if (*lp == CLO) /* equivalence... */ @@ -853,6 +853,8 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) { return NOTFOUND; break; case CCL: + if (lp >= endp) + return NOTFOUND; c = ci.CharAt(lp++); if (!isinset(ap,c)) return NOTFOUND; diff --git a/scintilla/RESearch.h b/scintilla/RESearch.h index 0944fc39..ef8c3e11 100644 --- a/scintilla/RESearch.h +++ b/scintilla/RESearch.h @@ -34,7 +34,7 @@ public: RESearch(CharClassify *charClassTable); ~RESearch(); bool GrabMatches(CharacterIndexer &ci); - const char *Compile(const char *pat, int length, bool caseSensitive, bool posix); + const char *Compile(const char *pattern, int length, bool caseSensitive, bool posix); int Execute(CharacterIndexer &ci, int lp, int endp); int Substitute(CharacterIndexer &ci, char *src, char *dst); @@ -51,7 +51,7 @@ private: void Clear(); void ChSet(unsigned char c); void ChSetWithCase(unsigned char c, bool caseSensitive); - int GetBackslashExpression(const char *pat, int &incr); + int GetBackslashExpression(const char *pattern, int &incr); int PMatch(CharacterIndexer &ci, int lp, int endp, char *ap); diff --git a/scintilla/RunStyles.h b/scintilla/RunStyles.h index f16421fd..0a333ca2 100644 --- a/scintilla/RunStyles.h +++ b/scintilla/RunStyles.h @@ -7,6 +7,9 @@ /// Styling buffer using one element for each run rather than using /// a filled buffer. +#ifndef RUNSTYLES_H +#define RUNSTYLES_H + #ifdef SCI_NAMESPACE namespace Scintilla { #endif @@ -39,3 +42,5 @@ public: #ifdef SCI_NAMESPACE } #endif + +#endif diff --git a/scintilla/ScintillaGTK.cxx b/scintilla/ScintillaGTK.cxx index cabc1f3f..0dd0ec9d 100644 --- a/scintilla/ScintillaGTK.cxx +++ b/scintilla/ScintillaGTK.cxx @@ -1406,10 +1406,10 @@ void ScintillaGTK::ClaimSelection() { void ScintillaGTK::GetGtkSelectionText(GtkSelectionData *selectionData, SelectionText &selText) { char *data = reinterpret_cast(selectionData->data); int len = selectionData->length; - GdkAtom selectionType = selectionData->type; + GdkAtom selectionTypeData = selectionData->type; // Return empty string if selection is not a string - if ((selectionType != GDK_TARGET_STRING) && (selectionType != atomUTF8)) { + if ((selectionTypeData != GDK_TARGET_STRING) && (selectionTypeData != atomUTF8)) { char *empty = new char[1]; empty[0] = '\0'; selText.Set(empty, 0, SC_CP_UTF8, 0, false, false); @@ -1425,7 +1425,7 @@ void ScintillaGTK::GetGtkSelectionText(GtkSelectionData *selectionData, Selectio #endif char *dest; - if (selectionType == GDK_TARGET_STRING) { + if (selectionTypeData == GDK_TARGET_STRING) { dest = Document::TransformLineEnds(&len, data, len, pdoc->eolMode); if (IsUnicodeMode()) { // Unknown encoding so assume in Latin1 diff --git a/scintilla/SplitVector.h b/scintilla/SplitVector.h index 9d62aef7..76f9a8f3 100644 --- a/scintilla/SplitVector.h +++ b/scintilla/SplitVector.h @@ -238,6 +238,12 @@ public: DeleteRange(0, lengthBody); } + T* BufferPointer() { + RoomFor(1); + GapTo(lengthBody); + body[lengthBody] = 0; + return body; + } }; #endif diff --git a/scintilla/include/SciLexer.h b/scintilla/include/SciLexer.h index 7fc8a6cf..44974085 100644 --- a/scintilla/include/SciLexer.h +++ b/scintilla/include/SciLexer.h @@ -454,6 +454,7 @@ #define SCE_DIFF_POSITION 4 #define SCE_DIFF_DELETED 5 #define SCE_DIFF_ADDED 6 +#define SCE_DIFF_CHANGED 7 #define SCE_CONF_DEFAULT 0 #define SCE_CONF_COMMENT 1 #define SCE_CONF_NUMBER 2 @@ -618,6 +619,11 @@ #define SCE_CSS_SINGLESTRING 14 #define SCE_CSS_IDENTIFIER2 15 #define SCE_CSS_ATTRIBUTE 16 +#define SCE_CSS_IDENTIFIER3 17 +#define SCE_CSS_PSEUDOELEMENT 18 +#define SCE_CSS_EXTENDED_IDENTIFIER 19 +#define SCE_CSS_EXTENDED_PSEUDOCLASS 20 +#define SCE_CSS_EXTENDED_PSEUDOELEMENT 21 #define SCE_POV_DEFAULT 0 #define SCE_POV_COMMENT 1 #define SCE_POV_COMMENTLINE 2 @@ -1235,6 +1241,27 @@ #define SCE_POWERSHELL_KEYWORD 8 #define SCE_POWERSHELL_CMDLET 9 #define SCE_POWERSHELL_ALIAS 10 +#define SCE_MYSQL_DEFAULT 0 +#define SCE_MYSQL_COMMENT 1 +#define SCE_MYSQL_COMMENTLINE 2 +#define SCE_MYSQL_VARIABLE 3 +#define SCE_MYSQL_SYSTEMVARIABLE 4 +#define SCE_MYSQL_KNOWNSYSTEMVARIABLE 5 +#define SCE_MYSQL_NUMBER 6 +#define SCE_MYSQL_MAJORKEYWORD 7 +#define SCE_MYSQL_KEYWORD 8 +#define SCE_MYSQL_DATABASEOBJECT 9 +#define SCE_MYSQL_PROCEDUREKEYWORD 10 +#define SCE_MYSQL_STRING 11 +#define SCE_MYSQL_SQSTRING 12 +#define SCE_MYSQL_DQSTRING 13 +#define SCE_MYSQL_OPERATOR 14 +#define SCE_MYSQL_FUNCTION 15 +#define SCE_MYSQL_IDENTIFIER 16 +#define SCE_MYSQL_QUOTEDIDENTIFIER 17 +#define SCE_MYSQL_USER1 18 +#define SCE_MYSQL_USER2 19 +#define SCE_MYSQL_USER3 20 #define SCE_PO_DEFAULT 0 #define SCE_PO_COMMENT 1 #define SCE_PO_MSGID 2 diff --git a/scintilla/include/Scintilla.h b/scintilla/include/Scintilla.h index a0486e50..ed0d3d35 100644 --- a/scintilla/include/Scintilla.h +++ b/scintilla/include/Scintilla.h @@ -668,6 +668,9 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCI_SETPOSITIONCACHE 2514 #define SCI_GETPOSITIONCACHE 2515 #define SCI_COPYALLOWLINE 2519 +#define SCI_GETCHARACTERPOINTER 2520 +#define SCI_SETKEYSUNICODE 2521 +#define SCI_GETKEYSUNICODE 2522 #define SCI_STARTRECORD 3001 #define SCI_STOPRECORD 3002 #define SCI_SETLEXER 4001 @@ -811,7 +814,7 @@ struct SCNotification { int length; /* SCN_MODIFIED */ int linesAdded; /* SCN_MODIFIED */ int message; /* SCN_MACRORECORD */ - uptr_t wParam; /* SCN_MACRORECORDv */ + uptr_t wParam; /* SCN_MACRORECORD */ sptr_t lParam; /* SCN_MACRORECORD */ int line; /* SCN_MODIFIED */ int foldLevelNow; /* SCN_MODIFIED */ diff --git a/scintilla/include/Scintilla.iface b/scintilla/include/Scintilla.iface index 7d9c4cb7..24ac63b1 100644 --- a/scintilla/include/Scintilla.iface +++ b/scintilla/include/Scintilla.iface @@ -104,7 +104,7 @@ fun void ClearAll=2004(,) # Set all style bytes to 0, remove all folding information. fun void ClearDocumentStyle=2005(,) -# Returns the number of characters in the document. +# Returns the number of bytes in the document. get int GetLength=2006(,) # Returns the character byte at the position. @@ -516,7 +516,7 @@ get int GetCaretPeriod=2075(,) set void SetCaretPeriod=2076(int periodMilliseconds,) # Set the set of characters making up words for when moving or selecting by word. -# First sets deaults like SetCharsDefault. +# First sets defaults like SetCharsDefault. set void SetWordChars=2077(, string characters) # Start a sequence of actions that is undone and redone as a unit. @@ -1536,7 +1536,7 @@ fun void ChooseCaretX=2399(,) # Set the focus to this Scintilla widget. fun void GrabFocus=2400(,) -enu CaretPolicy = CARET_ +enu CaretPolicy=CARET_ # Caret policy, used by SetXCaretPolicy and SetYCaretPolicy. # If CARET_SLOP is set, we can define a slop value: caretSlop. # This value defines an unwanted zone (UZ) where the caret is... unwanted. @@ -1800,6 +1800,16 @@ get int GetPositionCache=2515(,) # Copy the selection, if selection empty copy the line with the caret fun void CopyAllowLine=2519(,) +# Compact the document buffer and return a read-only pointer to the +# characters in the document. +get int GetCharacterPointer=2520(,) + +# Always interpret keyboard input as Unicode +set void SetKeysUnicode=2521(bool keysUnicode,) + +# Are keys always interpreted as Unicode? +get bool GetKeysUnicode=2522(,) + # Start notifying the container of all key presses and commands. fun void StartRecord=3001(,) @@ -1997,6 +2007,7 @@ val SCLEX_ASYMPTOTE=85 val SCLEX_R=86 val SCLEX_MAGIK=87 val SCLEX_POWERSHELL=88 +val SCLEX_OMS=89 val SCLEX_PO=90 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a @@ -2401,6 +2412,7 @@ val SCE_DIFF_HEADER=3 val SCE_DIFF_POSITION=4 val SCE_DIFF_DELETED=5 val SCE_DIFF_ADDED=6 +val SCE_DIFF_CHANGED=7 # Lexical states for SCLEX_CONF (Apache Configuration Files Lexer) lex Conf=SCLEX_CONF SCE_CONF_ val SCE_CONF_DEFAULT=0 @@ -2594,6 +2606,11 @@ val SCE_CSS_DOUBLESTRING=13 val SCE_CSS_SINGLESTRING=14 val SCE_CSS_IDENTIFIER2=15 val SCE_CSS_ATTRIBUTE=16 +val SCE_CSS_IDENTIFIER3=17 +val SCE_CSS_PSEUDOELEMENT=18 +val SCE_CSS_EXTENDED_IDENTIFIER=19 +val SCE_CSS_EXTENDED_PSEUDOCLASS=20 +val SCE_CSS_EXTENDED_PSEUDOELEMENT=21 # Lexical states for SCLEX_POV lex POV=SCLEX_POV SCE_POV_ val SCE_POV_DEFAULT=0 @@ -3295,6 +3312,29 @@ val SCE_POWERSHELL_IDENTIFIER=7 val SCE_POWERSHELL_KEYWORD=8 val SCE_POWERSHELL_CMDLET=9 val SCE_POWERSHELL_ALIAS=10 +# Lexical state for SCLEX_MYSQL +lex MySQL=SCLEX_MYSQL SCE_MYSQL_ +val SCE_MYSQL_DEFAULT=0 +val SCE_MYSQL_COMMENT=1 +val SCE_MYSQL_COMMENTLINE=2 +val SCE_MYSQL_VARIABLE=3 +val SCE_MYSQL_SYSTEMVARIABLE=4 +val SCE_MYSQL_KNOWNSYSTEMVARIABLE=5 +val SCE_MYSQL_NUMBER=6 +val SCE_MYSQL_MAJORKEYWORD=7 +val SCE_MYSQL_KEYWORD=8 +val SCE_MYSQL_DATABASEOBJECT=9 +val SCE_MYSQL_PROCEDUREKEYWORD=10 +val SCE_MYSQL_STRING=11 +val SCE_MYSQL_SQSTRING=12 +val SCE_MYSQL_DQSTRING=13 +val SCE_MYSQL_OPERATOR=14 +val SCE_MYSQL_FUNCTION=15 +val SCE_MYSQL_IDENTIFIER=16 +val SCE_MYSQL_QUOTEDIDENTIFIER=17 +val SCE_MYSQL_USER1=18 +val SCE_MYSQL_USER2=19 +val SCE_MYSQL_USER3=20 # Lexical state for SCLEX_PO lex Po=SCLEX_PO SCE_PO_ val SCE_PO_DEFAULT=0 diff --git a/src/plugindata.h b/src/plugindata.h index cb956030..a4e4adf3 100644 --- a/src/plugindata.h +++ b/src/plugindata.h @@ -41,7 +41,7 @@ enum { /** The Application Programming Interface (API) version, incremented * whenever any plugin data types are modified or appended to. */ - GEANY_API_VERSION = 100, + GEANY_API_VERSION = 101, /** The Application Binary Interface (ABI) version, incremented whenever * existing fields in the plugin data types have to be changed or reordered. */