Sripts to convert old lang files
parent
0971cb441f
commit
e0c326682c
|
@ -0,0 +1,497 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import xml.dom.minidom as dom
|
||||
import cgi
|
||||
|
||||
default_styles = {
|
||||
'Comment' : 'def:comment',
|
||||
'String' : 'def:string',
|
||||
'Preprocessor' : 'def:preprocessor',
|
||||
'Keyword' : 'def:keyword',
|
||||
'Data Type' : 'def:data-type',
|
||||
'Decimal' : 'def:decimal',
|
||||
'Specials' : 'def:specials',
|
||||
'Function' : 'def:function',
|
||||
'Base-N Integer' : 'def:base-n-integer',
|
||||
'Floating Point' : 'def:floating-point',
|
||||
'Floating point' : 'def:floating-point',
|
||||
'Others' : None,
|
||||
'Other' : None,
|
||||
'Others 2' : None,
|
||||
'Others 3' : None,
|
||||
}
|
||||
|
||||
def escape_escape_char(ch):
|
||||
if ch == '\\':
|
||||
return '\\\\'
|
||||
elif ch in ['@']:
|
||||
return ch
|
||||
raise RuntimeError("don't know how to escape '%s'" % (ch,))
|
||||
|
||||
def normalize_id(id):
|
||||
if id == "C#":
|
||||
return "c-sharp"
|
||||
elif id == ".desktop":
|
||||
return "desktop"
|
||||
elif id == ".ini":
|
||||
return "ini"
|
||||
elif id == "C++ Line Comment":
|
||||
return "cpp-line-comment"
|
||||
elif id == "Markup (inline)":
|
||||
return "markup-inline"
|
||||
elif id == "Markup (block)":
|
||||
return "markup-block"
|
||||
else:
|
||||
return id.replace(', ', '-').replace('.', '-').replace('*', '-').replace(',', '-').replace(' ', '-').replace('/', '-').replace('#', '-').lower()
|
||||
|
||||
class LangFile(object):
|
||||
def __init__(self, id, name, _name, section, _section, mimetypes, filename):
|
||||
object.__init__(self)
|
||||
|
||||
assert name or _name
|
||||
assert section or _section
|
||||
|
||||
self.id = normalize_id(id or name or _name)
|
||||
self.name = name
|
||||
self._name = _name
|
||||
self.section = section
|
||||
self._section = _section
|
||||
self.mimetypes = mimetypes
|
||||
self.filename = filename
|
||||
self.contexts = []
|
||||
self.escape_char = None
|
||||
|
||||
def set_esc_char(self, char):
|
||||
self.escape_char = char
|
||||
|
||||
def add_context(self, ctx):
|
||||
self.contexts.append(ctx)
|
||||
|
||||
def format_header(self, indent):
|
||||
string = '<?xml version="1.0" encoding="UTF-8"?>\n<language id="%s"' % (self.id,)
|
||||
|
||||
if self.name:
|
||||
string += ' name="%s"' % (self.name,)
|
||||
else:
|
||||
string += ' _name="%s"' % (self._name,)
|
||||
|
||||
string += ' version="2.0"'
|
||||
|
||||
if self.section:
|
||||
string += ' section="%s"' % (self.section,)
|
||||
else:
|
||||
string += ' _section="%s"' % (self._section,)
|
||||
|
||||
if self.mimetypes:
|
||||
string += ' mimetypes="%s"' % (cgi.escape(self.mimetypes),)
|
||||
|
||||
string += '>\n'
|
||||
|
||||
return string
|
||||
|
||||
def format_footer(self, indent):
|
||||
return '</language>\n'
|
||||
|
||||
def format_styles(self, indent):
|
||||
string = indent + "<styles>\n"
|
||||
styles = {}
|
||||
for ctx in self.contexts:
|
||||
map_to = default_styles[ctx.style_name]
|
||||
styles[ctx.style] = [ctx.style_name, map_to]
|
||||
for s in styles:
|
||||
id = s
|
||||
name, map_to = styles[s]
|
||||
if map_to:
|
||||
string += indent*2 + '<style id="%s" name="%s" map-to="%s"/>\n' % (id, name, map_to)
|
||||
else:
|
||||
string += indent*2 + '<style id="%s" name="%s"/>\n' % (id, name)
|
||||
string += indent + "</styles>\n"
|
||||
return string
|
||||
|
||||
def format_contexts(self, indent):
|
||||
string = indent + '<definitions>\n'
|
||||
|
||||
if self.escape_char:
|
||||
char = escape_escape_char(self.escape_char)
|
||||
|
||||
string += indent*2 + '<context id="generated-escape">\n'
|
||||
string += indent*3 + '<match>%s.</match>\n' % (char,)
|
||||
string += indent*2 + '</context>\n'
|
||||
|
||||
string += indent*2 + '<context id="generated-line-escape">\n'
|
||||
string += indent*3 + '<start>%s$</start>\n' % (char,)
|
||||
string += indent*3 + '<end>^</end>\n'
|
||||
string += indent*2 + '</context>\n'
|
||||
|
||||
for ctx in self.contexts:
|
||||
string += ctx.format(indent)
|
||||
|
||||
string += indent*2 + '<context id="%s">\n' % (self.id,)
|
||||
string += indent*3 + '<include>\n'
|
||||
for ctx in self.contexts:
|
||||
string += indent*4 + '<context ref="%s"/>\n' % (ctx.id,)
|
||||
string += indent*3 + '</include>\n'
|
||||
string += indent*2 + '</context>\n'
|
||||
|
||||
string += indent + '</definitions>\n'
|
||||
return string
|
||||
|
||||
def format(self, indent=' '):
|
||||
string = self.format_header(indent)
|
||||
string += self.format_styles(indent)
|
||||
string += self.format_contexts(indent)
|
||||
string += self.format_footer(indent)
|
||||
return string
|
||||
|
||||
class Context(object):
|
||||
def __init__(self, name, _name, style):
|
||||
object.__init__(self)
|
||||
assert (name or _name) and style
|
||||
self.name = name
|
||||
self._name = _name
|
||||
self.style_name = style
|
||||
self.style = style.replace(' ', '-').lower()
|
||||
self.id = normalize_id(name or _name)
|
||||
self.is_container = False
|
||||
|
||||
def format(self, indent):
|
||||
print "Implement me: %s.format()" % (type(self).__name__,)
|
||||
return indent*2 + '<context id="%s"/>\n' % (self.id)
|
||||
|
||||
def format_escape(self, indent):
|
||||
string = ""
|
||||
if self.is_container:
|
||||
string += indent*3 + '<include>\n'
|
||||
string += indent*4 + '<context ref="generated-escape"/>\n'
|
||||
string += indent*4 + '<context ref="generated-line-escape"/>\n'
|
||||
string += indent*3 + '</include>\n'
|
||||
return string
|
||||
|
||||
class KeywordList(Context):
|
||||
def __init__(self, name, _name, style, keywords, case_sensitive,
|
||||
match_empty_string_at_beginning,
|
||||
match_empty_string_at_end,
|
||||
beginning_regex, end_regex):
|
||||
Context.__init__(self, name, _name, style)
|
||||
self.keywords = keywords
|
||||
self.case_sensitive = case_sensitive # ???
|
||||
self.match_empty_string_at_beginning = match_empty_string_at_beginning
|
||||
self.match_empty_string_at_end = match_empty_string_at_end
|
||||
self.beginning_regex = beginning_regex
|
||||
self.end_regex = end_regex
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
|
||||
|
||||
if self.beginning_regex:
|
||||
string += indent*3 + '<prefix>%s</prefix>\n' % (cgi.escape(self.beginning_regex),)
|
||||
elif self.match_empty_string_at_beginning:
|
||||
string += indent*3 + '<prefix>\\b</prefix>\n'
|
||||
|
||||
if self.end_regex:
|
||||
string += indent*3 + '<suffix>%s</suffix>\n' % (cgi.escape(self.end_regex),)
|
||||
elif self.match_empty_string_at_end:
|
||||
string += indent*3 + '<suffix>\\b</suffix>\n'
|
||||
|
||||
for kw in self.keywords:
|
||||
string += indent*3 + '<keyword>%s</keyword>\n' % (cgi.escape(kw),)
|
||||
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
class PatternItem(Context):
|
||||
def __init__(self, name, _name, style, pattern):
|
||||
Context.__init__(self, name, _name, style)
|
||||
assert pattern
|
||||
self.pattern = pattern
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
|
||||
string += indent*3 + '<match>%s</match>\n' % (cgi.escape(self.pattern),)
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
class LineComment(Context):
|
||||
def __init__(self, name, _name, style, start):
|
||||
Context.__init__(self, name, _name, style)
|
||||
assert start
|
||||
self.start = start
|
||||
self.is_container = True
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s" end-at-line-end="true">\n' % (self.id, self.style)
|
||||
string += indent*3 + '<start>%s</start>\n' % (cgi.escape(self.start),)
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
class BlockComment(Context):
|
||||
def __init__(self, name, _name, style, start, end):
|
||||
Context.__init__(self, name, _name, style)
|
||||
assert start and end
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.is_container = True
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s">\n' % (self.id, self.style)
|
||||
string += indent*3 + '<start>%s</start>\n' % (cgi.escape(self.start),)
|
||||
string += indent*3 + '<end>%s</end>\n' % (cgi.escape(self.end),)
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
class String(Context):
|
||||
def __init__(self, name, _name, style, start, end, end_at_line_end):
|
||||
Context.__init__(self, name, _name, style)
|
||||
assert start and end
|
||||
self.start = start
|
||||
if end and end.endswith("\\n"):
|
||||
end = end[:-2]
|
||||
end_at_line_end = True
|
||||
self.end = end
|
||||
self.end_at_line_end = end_at_line_end
|
||||
self.is_container = True
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s"' % (self.id, self.style)
|
||||
if self.end_at_line_end:
|
||||
string += ' end-at-line-end="true"'
|
||||
string += '>\n'
|
||||
|
||||
if self.start:
|
||||
string += indent*3 + '<start>%s</start>\n' % (cgi.escape(self.start),)
|
||||
if self.end:
|
||||
string += indent*3 + '<end>%s</end>\n' % (cgi.escape(self.end),)
|
||||
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
class SyntaxItem(Context):
|
||||
def __init__(self, name, _name, style, start, end):
|
||||
Context.__init__(self, name, _name, style)
|
||||
assert start and end
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.end_at_line_end = False
|
||||
if end and end.endswith("\\n"):
|
||||
self.end = end[:-2]
|
||||
self.end_at_line_end = True
|
||||
self.is_container = True
|
||||
|
||||
def format(self, indent):
|
||||
string = indent*2 + '<context id="%s" style-ref="%s"' % (self.id, self.style)
|
||||
if self.end_at_line_end:
|
||||
string += ' end-at-line-end="true"'
|
||||
string += '>\n'
|
||||
|
||||
if self.start:
|
||||
string += indent*3 + '<start>%s</start>\n' % (cgi.escape(self.start),)
|
||||
if self.end:
|
||||
string += indent*3 + '<end>%s</end>\n' % (cgi.escape(self.end),)
|
||||
|
||||
string += self.format_escape(indent)
|
||||
string += indent*2 + '</context>\n'
|
||||
return string
|
||||
|
||||
def first_child(node):
|
||||
child = node.firstChild
|
||||
while child is not None and child.nodeType != dom.Node.ELEMENT_NODE:
|
||||
child = child.nextSibling
|
||||
return child
|
||||
def next_sibling(node):
|
||||
next = node.nextSibling
|
||||
while next is not None and next.nodeType != dom.Node.ELEMENT_NODE:
|
||||
next = next.nextSibling
|
||||
return next
|
||||
|
||||
def parseLineComment(cur, name, _name, style):
|
||||
child = first_child(cur)
|
||||
assert child is not None and child.tagName == "start-regex"
|
||||
return LineComment(name, _name, style, child.firstChild.nodeValue)
|
||||
|
||||
def parseBlockComment(cur, name, _name, style):
|
||||
start_regex = None
|
||||
end_regex = None
|
||||
child = first_child(cur)
|
||||
|
||||
while child is not None:
|
||||
if child.tagName == "start-regex":
|
||||
start_regex = child.firstChild.nodeValue
|
||||
elif child.tagName == "end-regex":
|
||||
end_regex = child.firstChild.nodeValue
|
||||
child = next_sibling(child)
|
||||
|
||||
assert start_regex is not None
|
||||
assert end_regex is not None
|
||||
|
||||
return BlockComment(name, _name, style, start_regex, end_regex)
|
||||
|
||||
def parseString(cur, name, _name, style):
|
||||
start_regex = None
|
||||
end_regex = None
|
||||
end_at_line_end = True
|
||||
|
||||
prop = cur.getAttribute("end-at-line-end")
|
||||
if prop:
|
||||
if prop in ["TRUE", "1"]:
|
||||
end_at_line_end = True
|
||||
else:
|
||||
end_at_line_end = False
|
||||
|
||||
child = first_child(cur)
|
||||
|
||||
while child is not None:
|
||||
if child.tagName == "start-regex":
|
||||
start_regex = child.firstChild.nodeValue
|
||||
elif child.tagName == "end-regex":
|
||||
end_regex = child.firstChild.nodeValue
|
||||
child = next_sibling(child)
|
||||
|
||||
assert start_regex is not None
|
||||
assert end_regex is not None
|
||||
|
||||
return String(name, _name, style, start_regex, end_regex, end_at_line_end)
|
||||
|
||||
def parseKeywordList(cur, name, _name, style):
|
||||
case_sensitive = True
|
||||
match_empty_string_at_beginning = True
|
||||
match_empty_string_at_end = True
|
||||
beginning_regex = None
|
||||
end_regex = None
|
||||
keywords = []
|
||||
|
||||
prop = cur.getAttribute("case-sensitive")
|
||||
if prop:
|
||||
if prop in ["TRUE", "1"]:
|
||||
case_sensitive = True
|
||||
else:
|
||||
case_sensitive = False
|
||||
|
||||
prop = cur.getAttribute("match-empty-string-at-beginning")
|
||||
if prop:
|
||||
if prop in ["TRUE", "1"]:
|
||||
match_empty_string_at_beginning = True
|
||||
else:
|
||||
match_empty_string_at_beginning = False
|
||||
|
||||
prop = cur.getAttribute("match-empty-string-at-end")
|
||||
if prop:
|
||||
if prop in ["TRUE", "1"]:
|
||||
match_empty_string_at_end = True
|
||||
else:
|
||||
match_empty_string_at_end = False
|
||||
|
||||
prop = cur.getAttribute("beginning-regex")
|
||||
if prop:
|
||||
beginning_regex = prop
|
||||
|
||||
prop = cur.getAttribute("end-regex")
|
||||
if prop:
|
||||
end_regex = prop
|
||||
|
||||
child = first_child(cur)
|
||||
|
||||
while child is not None:
|
||||
if child.tagName == "keyword":
|
||||
keywords.append(child.firstChild.nodeValue)
|
||||
child = next_sibling(child)
|
||||
|
||||
assert keywords
|
||||
|
||||
return KeywordList(name, _name, style, keywords, case_sensitive,
|
||||
match_empty_string_at_beginning,
|
||||
match_empty_string_at_end,
|
||||
beginning_regex, end_regex)
|
||||
|
||||
def parsePatternItem(cur, name, _name, style):
|
||||
child = first_child(cur)
|
||||
assert child is not None and child.tagName == "regex"
|
||||
return PatternItem(name, _name, style, child.firstChild.nodeValue)
|
||||
|
||||
def parseSyntaxItem(cur, name, _name, style):
|
||||
start_regex = None
|
||||
end_regex = None
|
||||
|
||||
child = first_child(cur)
|
||||
|
||||
while child is not None:
|
||||
if child.tagName == "start-regex":
|
||||
start_regex = child.firstChild.nodeValue
|
||||
elif child.tagName == "end-regex":
|
||||
end_regex = child.firstChild.nodeValue
|
||||
child = next_sibling(child)
|
||||
|
||||
assert start_regex is not None
|
||||
assert end_regex is not None
|
||||
|
||||
return SyntaxItem(name, _name, style, start_regex, end_regex)
|
||||
|
||||
def parseTag(cur):
|
||||
_name = None
|
||||
name = None
|
||||
|
||||
_name = cur.getAttribute("_name")
|
||||
name = cur.getAttribute("name")
|
||||
assert name or _name
|
||||
style = cur.getAttribute("style") or "Normal"
|
||||
|
||||
if cur.tagName == "line-comment":
|
||||
ctx = parseLineComment(cur, name, _name, style)
|
||||
elif cur.tagName == "block-comment":
|
||||
ctx = parseBlockComment(cur, name, _name, style)
|
||||
elif cur.tagName == "string":
|
||||
ctx = parseString(cur, name, _name, style)
|
||||
elif cur.tagName == "keyword-list":
|
||||
ctx = parseKeywordList(cur, name, _name, style)
|
||||
elif cur.tagName == "pattern-item":
|
||||
ctx = parsePatternItem(cur, name, _name, style)
|
||||
elif cur.tagName == "syntax-item":
|
||||
ctx = parseSyntaxItem(cur, name, _name, style)
|
||||
else:
|
||||
print "Unknown tag: %s" % (cur.tagName,)
|
||||
ctx = None
|
||||
|
||||
return ctx
|
||||
|
||||
def parse_file(filename):
|
||||
doc = dom.parse(filename)
|
||||
node = doc.documentElement
|
||||
contexts = []
|
||||
esc_char = None
|
||||
|
||||
assert node.tagName == "language"
|
||||
|
||||
lang_file = LangFile(node.getAttribute("id"),
|
||||
node.getAttribute("name"),
|
||||
node.getAttribute("_name"),
|
||||
node.getAttribute("section"),
|
||||
node.getAttribute("_section"),
|
||||
node.getAttribute("mimetypes"),
|
||||
filename)
|
||||
|
||||
node = first_child(node)
|
||||
assert node is not None
|
||||
|
||||
while node is not None:
|
||||
if node.tagName == "escape-char":
|
||||
lang_file.set_esc_char(node.firstChild.nodeValue)
|
||||
else:
|
||||
lang_file.add_context(parseTag(node))
|
||||
node = next_sibling(node)
|
||||
|
||||
return lang_file
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
import os
|
||||
|
||||
if not sys.argv[1:]:
|
||||
print "usage: %s LANG_FILE" % (sys.argv[0])
|
||||
os.exit(1)
|
||||
|
||||
lang_file = parse_file(sys.argv[1])
|
||||
sys.stdout.write(lang_file.format())
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/sh
|
||||
|
||||
files="ada changelog csharp css desktop diff fortran gtkrc \
|
||||
haskell idl ini java lua makefile msil nemerle octave pascal perl \
|
||||
php po python R ruby scheme sh sql tcl texinfo vbnet verilog vhdl"
|
||||
|
||||
for file in $files; do
|
||||
if ! (./convert.py $file.lang > $file.new.lang) ; then
|
||||
echo "*** Error: " $file.lang
|
||||
exit 1
|
||||
fi
|
||||
if ! (./check.sh $file.new.lang) ; then
|
||||
echo "*** Error: " $file.lang
|
||||
exit 1
|
||||
fi
|
||||
mv $file.new.lang $file.lang
|
||||
done
|
Loading…
Reference in New Issue