geany/scintilla/lexers/LexHaskell.cxx
2015-09-20 18:39:15 +02:00

1112 lines
35 KiB
C++

/******************************************************************
* LexHaskell.cxx
*
* A haskell lexer for the scintilla code control.
* Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
* External lexer stuff inspired from the caml external lexer.
* Folder copied from Python's.
*
* Written by Tobias Engvall - tumm at dtek dot chalmers dot se
*
* Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
*
* Improved by kudah <kudahkukarek@gmail.com>
*
* TODO:
* * A proper lexical folder to fold group declarations, comments, pragmas,
* #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
* etc.
*
*****************************************************************/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string>
#include <map>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "PropSetSimple.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif
// See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
// Note, letter modifiers are prohibited.
static int u_iswupper (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLu || c == ccLt;
}
static int u_iswalpha (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
}
static int u_iswalnum (int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccLl || c == ccLu || c == ccLt || c == ccLo
|| c == ccNd || c == ccNo;
}
static int u_IsHaskellSymbol(int ch) {
CharacterCategory c = CategoriseCharacter(ch);
return c == ccPc || c == ccPd || c == ccPo
|| c == ccSm || c == ccSc || c == ccSk || c == ccSo;
}
static inline bool IsHaskellLetter(const int ch) {
if (IsASCII(ch)) {
return (ch >= 'a' && ch <= 'z')
|| (ch >= 'A' && ch <= 'Z');
} else {
return u_iswalpha(ch) != 0;
}
}
static inline bool IsHaskellAlphaNumeric(const int ch) {
if (IsASCII(ch)) {
return IsAlphaNumeric(ch);
} else {
return u_iswalnum(ch) != 0;
}
}
static inline bool IsHaskellUpperCase(const int ch) {
if (IsASCII(ch)) {
return ch >= 'A' && ch <= 'Z';
} else {
return u_iswupper(ch) != 0;
}
}
static inline bool IsAnHaskellOperatorChar(const int ch) {
if (IsASCII(ch)) {
return
( ch == '!' || ch == '#' || ch == '$' || ch == '%'
|| ch == '&' || ch == '*' || ch == '+' || ch == '-'
|| ch == '.' || ch == '/' || ch == ':' || ch == '<'
|| ch == '=' || ch == '>' || ch == '?' || ch == '@'
|| ch == '^' || ch == '|' || ch == '~' || ch == '\\');
} else {
return u_IsHaskellSymbol(ch) != 0;
}
}
static inline bool IsAHaskellWordStart(const int ch) {
return IsHaskellLetter(ch) || ch == '_';
}
static inline bool IsAHaskellWordChar(const int ch) {
return ( IsHaskellAlphaNumeric(ch)
|| ch == '_'
|| ch == '\'');
}
static inline bool IsCommentBlockStyle(int style) {
return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
}
static inline bool IsCommentStyle(int style) {
return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
|| ( style == SCE_HA_LITERATE_COMMENT
|| style == SCE_HA_LITERATE_CODEDELIM);
}
// styles which do not belong to Haskell, but to external tools
static inline bool IsExternalStyle(int style) {
return ( style == SCE_HA_PREPROCESSOR
|| style == SCE_HA_LITERATE_COMMENT
|| style == SCE_HA_LITERATE_CODEDELIM);
}
static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
}
// Mangled version of lexlib/Accessor.cxx IndentAmount.
// Modified to treat comment blocks as whitespace
// plus special case for commentline/preprocessor.
static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
// Determines the indentation level of the current line
// Comment blocks are treated as whitespace
Sci_Position pos = styler.LineStart(line);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
char ch = styler[pos];
int style = styler.StyleAt(pos);
int indent = 0;
bool inPrevPrefix = line > 0;
Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
while (( ch == ' ' || ch == '\t'
|| IsCommentBlockStyle(style)
|| style == SCE_HA_LITERATE_CODEDELIM)
&& (pos < eol_pos)) {
if (inPrevPrefix) {
char chPrev = styler[posPrev++];
if (chPrev != ' ' && chPrev != '\t') {
inPrevPrefix = false;
}
}
if (ch == '\t') {
indent = (indent / 8 + 1) * 8;
} else { // Space or comment block
indent++;
}
pos++;
ch = styler[pos];
style = styler.StyleAt(pos);
}
indent += SC_FOLDLEVELBASE;
// if completely empty line or the start of a comment or preprocessor...
if ( styler.LineStart(line) == styler.Length()
|| ch == ' '
|| ch == '\t'
|| ch == '\n'
|| ch == '\r'
|| IsCommentStyle(style)
|| style == SCE_HA_PREPROCESSOR)
return indent | SC_FOLDLEVELWHITEFLAG;
else
return indent;
}
struct OptionsHaskell {
bool magicHash;
bool allowQuotes;
bool implicitParams;
bool highlightSafe;
bool cpp;
bool stylingWithinPreprocessor;
bool fold;
bool foldComment;
bool foldCompact;
bool foldImports;
OptionsHaskell() {
magicHash = true; // Widespread use, enabled by default.
allowQuotes = true; // Widespread use, enabled by default.
implicitParams = false; // Fell out of favor, seldom used, disabled.
highlightSafe = true; // Moderately used, doesn't hurt to enable.
cpp = true; // Widespread use, enabled by default;
stylingWithinPreprocessor = false;
fold = false;
foldComment = false;
foldCompact = false;
foldImports = false;
}
};
static const char * const haskellWordListDesc[] = {
"Keywords",
"FFI",
"Reserved operators",
0
};
struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
OptionSetHaskell() {
DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
"Set to 0 to disallow the '#' character at the end of identifiers and "
"literals with the haskell lexer "
"(GHC -XMagicHash extension)");
DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
"Set to 0 to disable highlighting of Template Haskell name quotations "
"and promoted constructors "
"(GHC -XTemplateHaskell and -XDataKinds extensions)");
DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
"Set to 1 to allow the '?' character at the start of identifiers "
"with the haskell lexer "
"(GHC & Hugs -XImplicitParams extension)");
DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
"Set to 0 to disallow \"safe\" keyword in imports "
"(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
"Set to 0 to disable C-preprocessor highlighting "
"(-XCPP extension)");
DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
"For Haskell code, determines whether all preprocessor code is styled in the "
"preprocessor style (0, the default) or only from the initial # to the end "
"of the command word(1)."
);
DefineProperty("fold", &OptionsHaskell::fold);
DefineProperty("fold.comment", &OptionsHaskell::foldComment);
DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
"Set to 1 to enable folding of import declarations");
DefineWordListSets(haskellWordListDesc);
}
};
class LexerHaskell : public ILexer {
bool literate;
Sci_Position firstImportLine;
int firstImportIndent;
WordList keywords;
WordList ffi;
WordList reserved_operators;
OptionsHaskell options;
OptionSetHaskell osHaskell;
enum HashCount {
oneHash
,twoHashes
,unlimitedHashes
};
enum KeywordMode {
HA_MODE_DEFAULT = 0
,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
,HA_MODE_MODULE = 4 // after "module", before module name.
,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
};
enum LiterateMode {
LITERATE_BIRD = 0 // if '>' is the first character on the line,
// color '>' as a codedelim and the rest of
// the line as code.
// else if "\begin{code}" is the only word on the
// line except whitespace, switch to LITERATE_BLOCK
// otherwise color the line as a literate comment.
,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
// 0 ignoring all later characters, color the line
// as a codedelim and switch to LITERATE_BIRD
// otherwise color the line as code.
};
struct HaskellLineInfo {
unsigned int nestLevel; // 22 bits ought to be enough for anybody
unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
// beyond 31.
bool pragma;
LiterateMode lmode;
KeywordMode mode;
HaskellLineInfo(int state) :
nestLevel (state >> 10)
, nonexternalStyle ((state >> 5) & 0x1F)
, pragma ((state >> 4) & 0x1)
, lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
, mode (static_cast<KeywordMode>(state & 0x7))
{}
int ToLineState() {
return
(nestLevel << 10)
| (nonexternalStyle << 5)
| (pragma << 4)
| (lmode << 3)
| mode;
}
};
inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
if (options.magicHash && sc.ch == '#') {
sc.Forward();
if (hashes == twoHashes && sc.ch == '#') {
sc.Forward();
} else if (hashes == unlimitedHashes) {
while (sc.ch == '#') {
sc.Forward();
}
}
}
}
bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
if (options.foldImports) {
Sci_Position currentPos = styler.LineStart(line);
int style = styler.StyleAt(currentPos);
Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
while (currentPos < eol_pos) {
int ch = styler[currentPos];
style = styler.StyleAt(currentPos);
if (ch == ' ' || ch == '\t'
|| IsCommentBlockStyle(style)
|| style == SCE_HA_LITERATE_CODEDELIM) {
currentPos++;
} else {
break;
}
}
return (style == SCE_HA_KEYWORD
&& styler.Match(currentPos, "import"));
} else {
return false;
}
}
inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
const int indent = HaskellIndentAmount(styler, line);
const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
? indent
: (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
}
inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
? indentLevel
: indentLevel - firstImportIndent;
}
public:
LexerHaskell(bool literate_)
: literate(literate_)
, firstImportLine(-1)
, firstImportIndent(0)
{}
virtual ~LexerHaskell() {}
void SCI_METHOD Release() {
delete this;
}
int SCI_METHOD Version() const {
return lvOriginal;
}
const char * SCI_METHOD PropertyNames() {
return osHaskell.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) {
return osHaskell.PropertyType(name);
}
const char * SCI_METHOD DescribeProperty(const char *name) {
return osHaskell.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
const char * SCI_METHOD DescribeWordListSets() {
return osHaskell.DescribeWordListSets();
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
void * SCI_METHOD PrivateCall(int, void *) {
return 0;
}
static ILexer *LexerFactoryHaskell() {
return new LexerHaskell(false);
}
static ILexer *LexerFactoryLiterateHaskell() {
return new LexerHaskell(true);
}
};
Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
if (osHaskell.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
WordList *wordListN = 0;
switch (n) {
case 0:
wordListN = &keywords;
break;
case 1:
wordListN = &ffi;
break;
case 2:
wordListN = &reserved_operators;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
WordList wlNew;
wlNew.Set(wl);
if (*wordListN != wlNew) {
wordListN->Set(wl);
firstModification = 0;
}
}
return firstModification;
}
void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
,IDocument *pAccess) {
LexAccessor styler(pAccess);
Sci_Position lineCurrent = styler.GetLine(startPos);
HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
// Do not leak onto next line
if (initStyle == SCE_HA_STRINGEOL)
initStyle = SCE_HA_DEFAULT;
else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
initStyle = hs.nonexternalStyle;
StyleContext sc(startPos, length, initStyle, styler);
int base = 10;
bool dot = false;
bool inDashes = false;
bool alreadyInTheMiddleOfOperator = false;
assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
while (sc.More()) {
// Check for state end
if (!IsExternalStyle(sc.state)) {
hs.nonexternalStyle = sc.state;
}
// For lexer to work, states should unconditionally forward at least one
// character.
// If they don't, they should still check if they are at line end and
// forward if so.
// If a state forwards more than one character, it should check every time
// that it is not a line end and cease forwarding otherwise.
if (sc.atLineEnd) {
// Remember the line state for future incremental lexing
styler.SetLineState(lineCurrent, hs.ToLineState());
lineCurrent++;
}
// Handle line continuation generically.
if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
&& ( sc.state == SCE_HA_STRING
|| sc.state == SCE_HA_PREPROCESSOR)) {
// Remember the line state for future incremental lexing
styler.SetLineState(lineCurrent, hs.ToLineState());
lineCurrent++;
sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
sc.Forward();
continue;
}
if (sc.atLineStart) {
if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
// Prevent SCE_HA_STRINGEOL from leaking back to previous line
sc.SetState(sc.state);
}
if (literate && hs.lmode == LITERATE_BIRD) {
if (!IsExternalStyle(sc.state)) {
sc.SetState(SCE_HA_LITERATE_COMMENT);
}
}
}
// External
// Literate
if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
&& sc.ch == '>') {
sc.SetState(SCE_HA_LITERATE_CODEDELIM);
sc.ForwardSetState(hs.nonexternalStyle);
}
else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
&& ( sc.ch == ' ' || sc.ch == '\t'
|| sc.Match("\\begin{code}"))) {
sc.SetState(sc.state);
while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
sc.Forward();
if (sc.Match("\\begin{code}")) {
sc.Forward(static_cast<int>(strlen("\\begin{code}")));
bool correct = true;
while (!sc.atLineEnd && sc.More()) {
if (sc.ch != ' ' && sc.ch != '\t') {
correct = false;
}
sc.Forward();
}
if (correct) {
sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
hs.lmode = LITERATE_BLOCK;
}
}
}
else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
&& sc.Match("\\end{code}")) {
sc.SetState(SCE_HA_LITERATE_CODEDELIM);
sc.Forward(static_cast<int>(strlen("\\end{code}")));
while (!sc.atLineEnd && sc.More()) {
sc.Forward();
}
sc.SetState(SCE_HA_LITERATE_COMMENT);
hs.lmode = LITERATE_BIRD;
}
// Preprocessor
else if (sc.atLineStart && sc.ch == '#' && options.cpp
&& (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
sc.SetState(SCE_HA_PREPROCESSOR);
sc.Forward();
}
// Literate
else if (sc.state == SCE_HA_LITERATE_COMMENT) {
sc.Forward();
}
else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
sc.ForwardSetState(hs.nonexternalStyle);
}
// Preprocessor
else if (sc.state == SCE_HA_PREPROCESSOR) {
if (sc.atLineEnd) {
sc.SetState(options.stylingWithinPreprocessor
? SCE_HA_DEFAULT
: hs.nonexternalStyle);
sc.Forward(); // prevent double counting a line
} else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
sc.SetState(SCE_HA_DEFAULT);
} else {
sc.Forward();
}
}
// Haskell
// Operator
else if (sc.state == SCE_HA_OPERATOR) {
int style = SCE_HA_OPERATOR;
if ( sc.ch == ':'
&& !alreadyInTheMiddleOfOperator
// except "::"
&& !( sc.chNext == ':'
&& !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
style = SCE_HA_CAPITAL;
}
alreadyInTheMiddleOfOperator = false;
while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
char s[100];
sc.GetCurrent(s, sizeof(s));
if (reserved_operators.InList(s))
style = SCE_HA_RESERVED_OPERATOR;
sc.ChangeState(style);
sc.SetState(SCE_HA_DEFAULT);
}
// String
else if (sc.state == SCE_HA_STRING) {
if (sc.atLineEnd) {
sc.ChangeState(SCE_HA_STRINGEOL);
sc.ForwardSetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\"') {
sc.Forward();
skipMagicHash(sc, oneHash);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
} else {
sc.Forward();
}
}
// Char
else if (sc.state == SCE_HA_CHARACTER) {
if (sc.atLineEnd) {
sc.ChangeState(SCE_HA_STRINGEOL);
sc.ForwardSetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\'') {
sc.Forward();
skipMagicHash(sc, oneHash);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
} else {
sc.Forward();
}
}
// Number
else if (sc.state == SCE_HA_NUMBER) {
if (sc.atLineEnd) {
sc.SetState(SCE_HA_DEFAULT);
sc.Forward(); // prevent double counting a line
} else if (IsADigit(sc.ch, base)) {
sc.Forward();
} else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
sc.Forward(2);
dot = false;
} else if ((base == 10) &&
(sc.ch == 'e' || sc.ch == 'E') &&
(IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
sc.Forward();
if (sc.ch == '+' || sc.ch == '-')
sc.Forward();
} else {
skipMagicHash(sc, twoHashes);
sc.SetState(SCE_HA_DEFAULT);
}
}
// Keyword or Identifier
else if (sc.state == SCE_HA_IDENTIFIER) {
int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
assert(IsAHaskellWordStart(sc.ch));
sc.Forward();
while (sc.More()) {
if (IsAHaskellWordChar(sc.ch)) {
sc.Forward();
} else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
if (IsHaskellUpperCase(sc.chNext)) {
sc.Forward();
style = SCE_HA_CAPITAL;
} else if (IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
style = SCE_HA_IDENTIFIER;
} else if (IsAnHaskellOperatorChar(sc.chNext)) {
sc.Forward();
style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
break;
} else {
break;
}
} else {
break;
}
}
skipMagicHash(sc, unlimitedHashes);
char s[100];
sc.GetCurrent(s, sizeof(s));
KeywordMode new_mode = HA_MODE_DEFAULT;
if (keywords.InList(s)) {
style = SCE_HA_KEYWORD;
} else if (style == SCE_HA_CAPITAL) {
if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
style = SCE_HA_MODULE;
new_mode = HA_MODE_IMPORT2;
} else if (hs.mode == HA_MODE_MODULE) {
style = SCE_HA_MODULE;
}
} else if (hs.mode == HA_MODE_IMPORT1 &&
strcmp(s,"qualified") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT1;
} else if (options.highlightSafe &&
hs.mode == HA_MODE_IMPORT1 &&
strcmp(s,"safe") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT1;
} else if (hs.mode == HA_MODE_IMPORT2) {
if (strcmp(s,"as") == 0) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_IMPORT3;
} else if (strcmp(s,"hiding") == 0) {
style = SCE_HA_KEYWORD;
}
} else if (hs.mode == HA_MODE_TYPE) {
if (strcmp(s,"family") == 0)
style = SCE_HA_KEYWORD;
}
if (hs.mode == HA_MODE_FFI) {
if (ffi.InList(s)) {
style = SCE_HA_KEYWORD;
new_mode = HA_MODE_FFI;
}
}
sc.ChangeState(style);
sc.SetState(SCE_HA_DEFAULT);
if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
new_mode = HA_MODE_IMPORT1;
else if (strcmp(s,"module") == 0)
new_mode = HA_MODE_MODULE;
else if (strcmp(s,"foreign") == 0)
new_mode = HA_MODE_FFI;
else if (strcmp(s,"type") == 0
|| strcmp(s,"data") == 0)
new_mode = HA_MODE_TYPE;
hs.mode = new_mode;
}
// Comments
// Oneliner
else if (sc.state == SCE_HA_COMMENTLINE) {
if (sc.atLineEnd) {
sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
sc.Forward(); // prevent double counting a line
} else if (inDashes && sc.ch != '-' && !hs.pragma) {
inDashes = false;
if (IsAnHaskellOperatorChar(sc.ch)) {
alreadyInTheMiddleOfOperator = true;
sc.ChangeState(SCE_HA_OPERATOR);
}
} else {
sc.Forward();
}
}
// Nested
else if (IsCommentBlockStyle(sc.state)) {
if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel++;
} else if (sc.Match('-','}')) {
sc.Forward(2);
assert(hs.nestLevel > 0);
if (hs.nestLevel > 0)
hs.nestLevel--;
sc.SetState(
hs.nestLevel == 0
? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
: CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
} else {
sc.Forward();
}
}
// Pragma
else if (sc.state == SCE_HA_PRAGMA) {
if (sc.Match("#-}")) {
hs.pragma = false;
sc.Forward(3);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.Match('-','-')) {
sc.SetState(SCE_HA_COMMENTLINE);
sc.Forward(2);
inDashes = false;
} else if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel = 1;
} else {
sc.Forward();
}
}
// New state?
else if (sc.state == SCE_HA_DEFAULT) {
// Digit
if (IsADigit(sc.ch)) {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_NUMBER);
if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
// Match anything starting with "0x" or "0X", too
sc.Forward(2);
base = 16;
dot = false;
} else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
// Match anything starting with "0o" or "0O", too
sc.Forward(2);
base = 8;
dot = false;
} else {
sc.Forward();
base = 10;
dot = true;
}
}
// Pragma
else if (sc.Match("{-#")) {
hs.pragma = true;
sc.SetState(SCE_HA_PRAGMA);
sc.Forward(3);
}
// Comment line
else if (sc.Match('-','-')) {
sc.SetState(SCE_HA_COMMENTLINE);
sc.Forward(2);
inDashes = true;
}
// Comment block
else if (sc.Match('{','-')) {
sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
sc.Forward(2);
hs.nestLevel = 1;
}
// String
else if (sc.ch == '\"') {
sc.SetState(SCE_HA_STRING);
sc.Forward();
}
// Character or quoted name or promoted term
else if (sc.ch == '\'') {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_CHARACTER);
sc.Forward();
if (options.allowQuotes) {
// Quoted type ''T
if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
sc.ChangeState(SCE_HA_IDENTIFIER);
} else if (sc.chNext != '\'') {
// Quoted name 'n or promoted constructor 'N
if (IsAHaskellWordStart(sc.ch)) {
sc.ChangeState(SCE_HA_IDENTIFIER);
// Promoted constructor operator ':~>
} else if (sc.ch == ':') {
alreadyInTheMiddleOfOperator = false;
sc.ChangeState(SCE_HA_OPERATOR);
// Promoted list or tuple '[T]
} else if (sc.ch == '[' || sc.ch== '(') {
sc.ChangeState(SCE_HA_OPERATOR);
sc.ForwardSetState(SCE_HA_DEFAULT);
}
}
}
}
// Operator starting with '?' or an implicit parameter
else if (sc.ch == '?') {
hs.mode = HA_MODE_DEFAULT;
alreadyInTheMiddleOfOperator = false;
sc.SetState(SCE_HA_OPERATOR);
if ( options.implicitParams
&& IsAHaskellWordStart(sc.chNext)
&& !IsHaskellUpperCase(sc.chNext)) {
sc.Forward();
sc.ChangeState(SCE_HA_IDENTIFIER);
}
}
// Operator
else if (IsAnHaskellOperatorChar(sc.ch)) {
hs.mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_OPERATOR);
}
// Braces and punctuation
else if (sc.ch == ',' || sc.ch == ';'
|| sc.ch == '(' || sc.ch == ')'
|| sc.ch == '[' || sc.ch == ']'
|| sc.ch == '{' || sc.ch == '}') {
sc.SetState(SCE_HA_OPERATOR);
sc.ForwardSetState(SCE_HA_DEFAULT);
}
// Keyword or Identifier
else if (IsAHaskellWordStart(sc.ch)) {
sc.SetState(SCE_HA_IDENTIFIER);
// Something we don't care about
} else {
sc.Forward();
}
}
// This branch should never be reached.
else {
assert(false);
sc.Forward();
}
}
sc.Complete();
}
void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
,IDocument *pAccess) {
if (!options.fold)
return;
Accessor styler(pAccess, NULL);
Sci_Position lineCurrent = styler.GetLine(startPos);
if (lineCurrent <= firstImportLine) {
firstImportLine = -1; // readjust first import position
firstImportIndent = 0;
}
const Sci_Position maxPos = startPos + length;
const Sci_Position maxLines =
maxPos == styler.Length()
? styler.GetLine(maxPos)
: styler.GetLine(maxPos - 1); // Requested last line
const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
// Backtrack to previous non-blank line so we can determine indent level
// for any white space lines
// and so we can fix any preceding fold level (which is why we go back
// at least one line in all cases)
bool importHere = LineContainsImport(lineCurrent, styler);
int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
while (lineCurrent > 0) {
lineCurrent--;
importHere = LineContainsImport(lineCurrent, styler);
indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
break;
}
int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
if (importHere) {
indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
if (firstImportLine == -1) {
firstImportLine = lineCurrent;
firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
}
if (firstImportLine != lineCurrent) {
indentCurrentLevel++;
}
}
indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
// Process all characters to end of requested range
//that hangs over the end of the range. Cap processing in all cases
// to end of document.
while (lineCurrent <= docLines && lineCurrent <= maxLines) {
// Gather info
Sci_Position lineNext = lineCurrent + 1;
importHere = false;
int indentNext = indentCurrent;
if (lineNext <= docLines) {
// Information about next line is only available if not at end of document
importHere = LineContainsImport(lineNext, styler);
indentNext = IndentAmountWithOffset(styler, lineNext);
}
if (indentNext & SC_FOLDLEVELWHITEFLAG)
indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
// Skip past any blank lines for next indent level info; we skip also
// comments (all comments, not just those starting in column 0)
// which effectively folds them into surrounding code rather
// than screwing up folding.
while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
lineNext++;
importHere = LineContainsImport(lineNext, styler);
indentNext = IndentAmountWithOffset(styler, lineNext);
}
int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
if (importHere) {
indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
if (firstImportLine == -1) {
firstImportLine = lineNext;
firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
}
if (firstImportLine != lineNext) {
indentNextLevel++;
}
}
indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
// Now set all the indent levels on the lines we skipped
// Do this from end to start. Once we encounter one line
// which is indented more than the line after the end of
// the comment-block, use the level of the block before
Sci_Position skipLine = lineNext;
int skipLevel = indentNextLevel;
while (--skipLine > lineCurrent) {
int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
if (options.foldCompact) {
if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
skipLevel = levelBeforeComments;
}
int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
styler.SetLevel(skipLine, skipLevel | whiteFlag);
} else {
if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
&& !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
skipLevel = levelBeforeComments;
}
styler.SetLevel(skipLine, skipLevel);
}
}
int lev = indentCurrent;
if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
lev |= SC_FOLDLEVELHEADERFLAG;
}
// Set fold level for this line and move to next line
styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
indentCurrent = indentNext;
indentCurrentLevel = indentNextLevel;
lineCurrent = lineNext;
}
// NOTE: Cannot set level of last line here because indentCurrent doesn't have
// header flag set; the loop above is crafted to take care of this case!
//styler.SetLevel(lineCurrent, indentCurrent);
}
LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);