#!/usr/bin/env python

import os
import sys
import re
from lxml import etree
from optparse import OptionParser

def normalize_text(s):
    r"""
    Normalizes whitespace in text.

    >>> normalize_text("asd xxx")
    'asd xxx'
    >>> normalize_text(" asd\nxxx  ")
    'asd xxx'
    """
    return s.replace("\n", " ").strip()

CXX_NAMESPACE_RE = re.compile(r'[_a-zA-Z][_0-9a-zA-Z]*::')
def fix_definition(s):
    """
    Removes C++ name qualifications from some definitions.

    For example:

    >>> fix_definition("bool flag")
    'bool flag'
    >>> fix_definition("bool FooBar::flag")
    'bool flag'
    >>> fix_definition("void(* _GeanyObjectClass::project_open) (GKeyFile *keyfile)")
    'void(* project_open) (GKeyFile *keyfile)'

    """
    return CXX_NAMESPACE_RE.sub(r"", s);

class AtAt(object):

    def __init__(self):
        self.retval = None
        self.since = ""
        self.annot = []

    def cb(type, str):
        return "@%s %s" % (type, str)

class AtDoc(object):
    def __init__(self):
        self.retval = None
        self.since = ""
        self.annot = []

    def cb(self, type, str):
        if (type == "param"):
            words = str.split(" ", 2);
            self.annot = []
        elif (type == "return"):
            self.annot = []
        elif (type == "since"):
            self.since = str.rstrip()
        elif (type == "geany:skip"):
            self.annot.append("skip")
        elif (type == "geany:nullable") or (type == "geany:skip"):
            self.annot.append(type.split(":")[1])
        elif (type == "geany:cb"):
            self.annot.append("scope notified")
        elif (type == "geany:cbdata"):
            self.annot.append("closure")
        elif (type == "geany:cbfree"):
            self.annot.append("destroy")
        elif (type == "geany:transfer") or (type == "geany:element-type") or (type == "geany:scope"):
            type = type.split(":")[1]
            self.annot.append("%s %s" % (type, str))
        elif (type ==  "see"):
            return "See " + str
        elif (type ==  "a"):
            if (str != "NULL"): # FIXME: some of geany does @a NULL
                return "@" + str
            else:
                return str
        else:
            return str

        return ""

class At(object):
    def __init__(self, cb):
        self.cb = cb

class DoxygenProcess(object):
    def __init__(self):
        self.at = None

    # http://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml
    @staticmethod
    def stringify_children(node):
        from lxml.etree import tostring
        from itertools import chain
        parts = ([node.text] +
                list(chain(*([c.text, tostring(c).decode("utf-8"), c.tail] for c in node.getchildren()))) +
                [node.tail])
        # filter removes possible Nones in texts and tails
        return "".join(filter(None, parts))

    def get_program_listing(self, xml):
        from lxml.etree import tostring
        arr = ["", "|[<!-- language=\"C\" -->"]
        for l in xml.getchildren():
            if (l.tag == "codeline"):
                # a codeline is of the form
                # <highlight class="normal">GeanyDocument<sp/>*doc<sp/>=<sp/>...;</highlight>
                # <sp/> tags must be replaced with spaces, then just use the text
                h = l.find("highlight")
                if h is not None:
                    html = tostring(h).decode("utf-8")
                    html = html.replace("<sp/>", " ")
                    arr.append("  " + tostring(etree.HTML(html), method="text").decode("utf-8"))
        arr.append("]|")
        return "\n".join(arr)

    def join_annot(self):
        s = " ".join(map(lambda x: "(%s)" % x, self.at.annot))
        return s + ": " if s else ""

    def process_element(self, xml):
        self.at = AtDoc()
        s = self.__process_element(xml)
        return s

    def get_extra(self):
        return self.join_annot()

    def get_return(self):
        return self.at.retval

    def get_since(self):
        return self.at.since

    def __process_element(self, xml):
        s = ""

        if xml.text:
            s += xml.text
        for n in xml.getchildren():
            if n.tag == "emphasis":
                s += self.at.cb("a", self.__process_element(n))
            if n.tag == "computeroutput":
                s += self.at.cb("c", self.__process_element(n))
            if n.tag == "itemizedlist":
                s += "\n" + self.__process_element(n)
            if n.tag == "listitem":
                s += " - " + self.__process_element(n)
            if n.tag == "para":
                s += self.__process_element(n) + "\n"
            if n.tag == "ref":
                s += n.text if n.text else ""
            if n.tag == "simplesect":
                ss = self.at.cb(n.get("kind"), self.__process_element(n))
                s += ss if ss + "\n" else ""
            if n.tag == "programlisting":
                s += self.get_program_listing(n)
            if n.tag == "xrefsect":
                s += self.__process_element(n)
            if n.tag == "xreftitle":
                s += self.__process_element(n) + ": "
            if n.tag == "xrefdescription":
                s += self.__process_element(n)
            if n.tag == "ulink":
                s += self.__process_element(n)
            if n.tag == "linebreak":
                s += "\n"
            if n.tag == "ndash":
                s += "--"
                # workaround for doxygen bug #646002
            if n.tag == "htmlonly":
                s += ""
            if n.tail:
                s += n.tail
            if n.tag.startswith("param"):
                pass # parameters are handled separately in DoxyFunction::from_memberdef()
        return s

class DoxyMember(object):
    def __init__(self, name, brief, extra = ""):
        self.name       = name
        self.brief      = brief
        self.extra      = extra


class DoxyElement(object):

    def __init__(self, name, definition, **kwargs):
        self.name       = name
        self.definition = definition
        self.brief      = kwargs.get('brief', "")
        self.detail     = kwargs.get('detail', "")
        self.members    = kwargs.get('members', [])
        self.since      = kwargs.get('since', "")
        self.extra      = kwargs.get('extra', "")
        self.retval     = kwargs.get('retval', None)

    def is_documented(self):
        if (normalize_text(self.brief)) != "":
            return True
        return False

    def add_brief(self, xml):
        proc = DoxygenProcess()
        self.brief = proc.process_element(xml)
        self.extra += proc.get_extra()

    def add_detail(self, xml):
        proc = DoxygenProcess()
        self.detail = proc.process_element(xml)
        self.extra += proc.get_extra()
        self.since = proc.get_since()

    def add_member(self, xml):
        name = xml.find("name").text
        proc = DoxygenProcess()
        brief = proc.process_element(xml.find("briefdescription"))
        # optional doxygen command output appears within <detaileddescription />
        proc.process_element(xml.find("detaileddescription"))
        self.members.append(DoxyMember(name, normalize_text(brief), proc.get_extra()))

    def add_param(self, xml):
        name = xml.find("parameternamelist").find("parametername").text
        proc = DoxygenProcess()
        brief = proc.process_element(xml.find("parameterdescription"))
        self.members.append(DoxyMember(name, normalize_text(brief), proc.get_extra()))

    def add_return(self, xml):
        proc = DoxygenProcess()
        brief = proc.process_element(xml)
        self.retval = DoxyMember("ret", normalize_text(brief), proc.get_extra())

    def to_gtkdoc(self):
        s  = []
        s.append("/**")
        s.append(" * %s: %s" % (self.name, self.extra))
        for p in self.members:
            s.append(" * @%s: %s %s" % (p.name, p.extra, p.brief))
        s.append(" *")
        s.append(" * %s" % self.brief.replace("\n", "\n * "))
        s.append(" *")
        s.append(" * %s" % self.detail.replace("\n", "\n * "))
        s.append(" *")
        if self.retval:
            s.append(" * Returns: %s %s" % (self.retval.extra, self.retval.brief))
        if self.since:
            s.append(" *")
            s.append(" * Since: %s" % self.since)
        s.append(" */")
        s.append("")
        return "\n".join(s)

class DoxyTypedef(DoxyElement):

    @staticmethod
    def from_memberdef(xml):
        name = xml.find("name").text
        d = normalize_text(xml.find("definition").text).replace("G_BEGIN_DECLS", "")
        d += ";"
        return DoxyTypedef(name, d)

class DoxyEnum(DoxyElement):

    @staticmethod
    def from_memberdef(xml):
        name = xml.find("name").text
        d = "typedef enum {\n"
        for member in xml.findall("enumvalue"):
            v = member.find("initializer")
            d += "\t%s%s,\n" % ( member.find("name").text, " "+v.text if v is not None else "")
        d += "} %s;\n" % name

        e = DoxyEnum(name, d)
        e.add_brief(xml.find("briefdescription"))
        for p in xml.findall("enumvalue"):
            e.add_member(p)
        return e

class DoxyStruct(DoxyElement):

    @staticmethod
    def from_compounddef(xml, typedefs = []):
        name = xml.find("compoundname").text
        section = xml.find("sectiondef")
        d = "struct %s {\n" % name;
        for p in section.findall("memberdef"):
            # workaround for struct members. g-ir-scanner can't properly map struct members
            # (beginning with struct GeanyFoo) to the typedef and assigns a generic type for them
            # thus we fix that up here and enforce usage of the typedef. These are written
            # out first, before any struct definition, for this reason
            # Exception: there are no typedefs for GeanyFooPrivate so skip those. Their exact
            # type isn't needed anyway
            s = fix_definition(p.find("definition").text).lstrip()
            words = s.split()
            if (words[0] == "struct"):
                if not (words[1].endswith("Private") or words[1].endswith("Private*")):
                    s = " ".join(words[1:])
            d += "\t%s;\n"  % s

        d += "};\n"
        e = DoxyStruct(name, d)
        e.add_brief(xml.find("briefdescription"))
        for p in section.findall("memberdef"):
            e.add_member(p)
        return e

class DoxyFunction(DoxyElement):

    @staticmethod
    def from_memberdef(xml):
        name = xml.find("name").text
        d = normalize_text(xml.find("definition").text.replace("G_BEGIN_DECLS", ""))
        d += " " + xml.find("argsstring").text + ";"
        d = normalize_text(d.replace("GEANY_API_SYMBOL", ""))

        e = DoxyFunction(name, d)
        e.add_brief(xml.find("briefdescription"))
        e.add_detail(xml.find("detaileddescription"))
        for p in xml.xpath(".//detaileddescription/*/parameterlist[@kind='param']/parameteritem"):
            e.add_param(p)
        x = xml.xpath(".//detaileddescription/*/simplesect[@kind='return']")
        if (len(x) > 0):
            e.add_return(x[0])
        return e

def main(args):

    xml_dir = None
    outfile = None
    scioutfile = None

    parser = OptionParser(usage="usage: %prog [options] XML_DIR")
    parser.add_option("--xmldir", metavar="DIRECTORY", help="Path to Doxygen-generated XML files",
        action="store", dest="xml_dir")
    parser.add_option("-d", "--outdir", metavar="DIRECTORY", help="Path to Doxygen-generated XML files",
        action="store", dest="outdir", default=".")
    parser.add_option("-o", "--output", metavar="FILE", help="Write output to FILE",
        action="store", dest="outfile")
    parser.add_option("--sci-output", metavar="FILE", help="Write scintilla_object_* output to FILE",
        action="store", dest="scioutfile")
    opts, args = parser.parse_args(args[1:])

    xml_dir = args[0]
    if (opts.outfile):
        outfile = open(opts.outfile, "w+")
    else:
        outfile=sys.stdout

    if (opts.scioutfile):
        scioutfile = open(opts.scioutfile, "w+")
    else:
        scioutfile = outfile

    if (outfile is None):
        sys.stderr.write("no output file\n")
        return 1

    if not (os.path.exists(xml_dir)):
        sys.stderr.write("invalid xml directory\n")
        return 1

    transform = etree.XSLT(etree.parse(os.path.join(xml_dir, "combine.xslt")))
    doc = etree.parse(os.path.join(xml_dir, "index.xml"))
    root = transform(doc)

    other = []
    typedefs = []

    c_files = root.xpath(".//compounddef[@kind='file']/compoundname[substring(.,string-length(.)-1)='.c']/..")
    h_files = root.xpath(".//compounddef[@kind='file']/compoundname[substring(.,string-length(.)-1)='.h']/..")

    for f in h_files:
        if not (f.find("compoundname").text.endswith("private.h")):
            for n0 in f.xpath(".//*/memberdef[@kind='typedef' and @prot='public']"):
                if not (n0.find("type").text.replace("G_BEGIN_DECLS", "").lstrip().startswith("enum")):
                    e = DoxyTypedef.from_memberdef(n0)
                    typedefs.append(e)

        for n0 in f.xpath(".//*/memberdef[@kind='enum' and @prot='public']"):
            e = DoxyEnum.from_memberdef(n0)
            other.append(e)

    for n0 in root.xpath(".//compounddef[@kind='struct' and @prot='public']"):
        e = DoxyStruct.from_compounddef(n0)
        other.append(e)

    for f in c_files:
        for n0 in f.xpath(".//*/memberdef[@kind='function' and @prot='public']"):
            e = DoxyFunction.from_memberdef(n0)
            other.append(e)

    outfile.write("#include <glib.h>\n")
    outfile.write("#include <gtk/gtk.h>\n")
    outfile.write("typedef struct _ScintillaObject ScintillaObject;\n")
    outfile.write("typedef struct TMSourceFile TMSourceFile;\n")
    outfile.write("typedef struct TMWorkspace TMWorkspace;\n")

    # write typedefs first, they are possibly undocumented but still required (even
    # if they are documented, they must be written out without gtkdoc)
    for e in typedefs:
        outfile.write(e.definition)
        outfile.write("\n\n")

    for e in filter(lambda x: x.is_documented(), other):
        outfile.write("\n\n")
        outfile.write(e.to_gtkdoc())
        outfile.write(e.definition)
        outfile.write("\n\n")
        if (e.name.startswith("sci_")):
            scioutfile.write(e.to_gtkdoc().replace("sci_", "scintilla_object_"))
            scioutfile.write(e.definition.replace("sci_", "scintilla_object_"))
            scioutfile.write("\n\n")

    return 0

if __name__ == "__main__":
    sys.exit(main(sys.argv))