mt-lua-api-pyparse/parserv6.py

268 lines
9.3 KiB
Python

# -*- coding: utf-8 -*-
print("**Minetest lua_api.txt Markdown parser - mt-lua-api-pyparse - converts Markdown to HTML document with navigation**")
print("**Written by Lars Müller alias LMD in Python 3.5 - requires Python >= 3.2**")
print("**Furthermore, this script has to be in the same folder as generate_bg.py, template.html, jumbotron.css, and the images taken from Minetest Game.**")
print("**The images taken from Minetest Game are licensed under CC-BY-SA 3.0, credits go to the Minetest Artists.**")
#from cgi import html
from xml.sax.saxutils import escape, unescape
html_escape_table = {
'"': """,
"'": "'"
}
def html_escape(text):
return escape(text, html_escape_table)
import urllib.request as url
import urllib.parse as parse
print("**Generating underground Minetest scene...**")
exec(open("generate_bg.py","r").read()) # IMPORTANT - GENERATES THE BEAUTIFUL BACKGROUND !
print("**...finished generating scene.**")
import math
# This python script grabs the newest lua_api.txt from Minetest GitHub repo and converts it to HTML, plus adding some bookmarks & css
# So mainly MD -> HTML. Written by me to improve my rusty Python skills.
# © Lars Müller @appguru.eu
print("**Grabbing newest lua_api.txt, make sure you have an internet connection...**")
link = "https://raw.githubusercontent.com/minetest/minetest/master/doc/lua_api.txt" # Grab newest lua_api.txt
f = url.urlopen(link)
markdown = parse.unquote(str(f.read().decode("ascii","ignore"))) # Read & convert
print("**...grabbed newest lua_api.txt from official Minetest repository.**")
liste=0 # Which sublist we are in right NOW
headers=[] # Stores all the headers + IDs
ID=0 # Stores header ID counter
print("**Starting parsing...**")
def parse_markdown(string,parent=False): # PARSES A SINGLE LINE !
global liste
global ID
global headers
suffix=""
prefix=""
if string.find("*") != -1 and (string[0:string.find("*")].count(" ") == string.find("*")) and not (parent or string[string.find("*")+1]=="*"): # LISTS
prevliste=liste
liste=1+int(string.find("*")/3)
if (liste > prevliste):
for i in range(0,liste-prevliste):
prefix+="<ul>"
elif (liste < prevliste):
for i in range(0,prevliste-liste):
prefix+="</ul>"
return prefix+"<li>"+parse_markdown(string[string.find("*")+2:],parent=True)+"</li>"+suffix
if not parent and liste != 0 and string=="":
for i in range(0,liste):
prefix+="</ul>"
liste=0
if (len(string)) == 0:
return prefix+"<br>"
if (string[-2:]==" "):
return prefix+parse_markdown(string[:-2],parent=True)+"<br>"
if (string[0]=="#"):
space=string.find(" ")
c=string[0:space-1].count("#")
if space==-1 or string[space+1:].count(" ")==len(string)-space-1:
return "<br>"
if (space-1==c):
ID+=1
c+=1
temp="<h"+str(c)+'>'+parse_markdown(string[space+1:],parent=True)+"</h"+str(c)+">"
headers.append((temp,str(ID)))
temp=prefix+temp[:3]+' id="gheader'+str(ID)+'"'+temp[3:]
return temp
bold=False
boldamount=string.count("**")
ba=0
italic=False
code=False
link=False
link2=False
codeamount=string.count("`")
ca=0
startindex=0
tags=[]
currentstring=""
index=-1
while index in range(-1,len(string)-1):
index+=1
appendtag=False
c=string[index]
if c == "`":
if ca < codeamount:
code=not code
ca=ca+1
if not code: # We have just closed a code fragment
tags.append((string[startindex+1:index],"code"))
continue
else: # A new one starts : SAVE INDEX + SAVE CURRENT STRING !
appendtag=True
elif not code:
if c == "*" and len(string) > index+1 and string[index+1] == "*":
if ba < boldamount:
index+=1
bold=not bold
ba=ba+1
if not bold: # We have just closed a code fragment
tags.append((string[startindex+1:index-1],"bold"))
continue
else: # A new one starts : SAVE INDEX + SAVE CURRENT STRING !
appendtag=True
elif c == "<" and not link:
appendtag=True
link=True
elif c == ">" and link:
link=False
tags.append((string[startindex+1:index],"link"))
continue
elif c == "[":
breakit=False
text=""
for i in range(index+2,len(string)-3):
c2=string[i]
if (c2 == "]"):
text=string[index+1:i]
if string[i+1]=="(":
for j in range(i+3,len(string)):
c3=string[j]
if (c3 == ")"):
breakit=True
tags.append((text,"link",string[i+2:j]))
index=j+1
break
if breakit:
continue
if appendtag:
tags.append((currentstring,"normal"))
currentstring=""
startindex=index
continue
if not bold and not code and not link and not link2:
currentstring+=c
if len(currentstring) != 0:
tags.append((currentstring,"normal"))
result=""
for tag in tags:
string=tag[0]
p=""
s=""
if tag[1]=="code":
p,s="<code>","</code>"
elif tag[1]=="bold":
p,s="<b>","</b>"
elif tag[1]=="link":
if len(tag) == 2:
if tag[0][0:4] == "http": # CHECK LINKS !
p,s='<a href="'+tag[0]+'">',"</a>"
else:
p,s='<a href="'+tag[2]+'">',"</a>"
elif tag[1]=="italic":
p,s="<em>","</em>"
result+=p+html_escape(string)+s
return prefix+"<p>"+result+"</p>"
def parse_md(string): # Parse line by line
lines=string.split("\n")
ret=""
for i in range(len(lines)-1,0,-1): # Convert alternate header writings(underlines)
if abs(len(lines[i-1])-len(lines[i])) < 3:
if lines[i].count("=")==len(lines[i]):
lines[i]=""
lines[i-1]="# "+lines[i-1]
elif lines[i].count("-")==len(lines[i]):
lines[i]=""
lines[i-1]="## "+lines[i-1]
i=0
ident=False
segments=0
for line in lines:
prefix=""
suffix=""
asteriskpos=line.find("*")
# or (len(line) > 1 and line[0]=="\t" and (asteriskpos==-1 or line[0:asteriskpos].count("\t") != asteriskpos)))
if liste== 0 and ((len(line) > 4 and line[0:4]==" "*4 and (asteriskpos==-1 or asteriskpos > 1 or line[0:asteriskpos].count(" ") != asteriskpos))):
if not ident:
prefix="<pre><code>"
#print("START : "+line[4:])
ident=True
elif ident:
ident=False
prefix="</code></pre>"
segments+=1
#else:
#if (len(line > 4)
#print("{"+line[0:4]+";"+line[0]+"}")
lval=""
if ident:
lval=html_escape(line[4:])+"\n"
else:
lval=parse_markdown(line)
ret+=prefix+lval
i=i+1
print("**Found "+str(segments)+" multi-line code segments.**")
if ident:
ident=False
return ret+"</code></pre>"
return ret
def code(): # Parse multi-line code fragments
global markdown
last=-1
i=0
stuff=[]
while (i < len(markdown)):
if markdown[i:i+3]=="`"*3: # Handle GitHub style code tags
i=i+3
if last < 0:
start=-(last+1)
last=i
stuff.append((markdown[start:last-3],False))
else:
stuff.append((markdown[last:i-3],True))
last=-i-1
i=i+1
start=-(last+1)
stuff.append((markdown[start:],False))
#print(stuff)
markdown=""
for s in stuff:
if s[1]:
markdown+="<pre><code>"+s[0]+"</code></pre>"
else:
markdown+=parse_md(s[0])
code()
print("**...finished parsing.**")
nav=""
print("**Creating content table...**")
for header in headers:
nav+="""<li><a class="nav-link" href="#gheader"""+header[1]+"""">"""+header[0]+"""</a></li>""" # Create navbar
print("**...finished creating content table. "+str(len(headers))+" Headers are included.**")
# FINAL - THE FINAL HTML, BOOTSTRAP BASED DOCUMENT OUR HTML IS INSERTED IN
final = open('template.html', 'r').read()
print("**Inserting content into template file...**")
markdown=final.replace("<!--PLACESTUFF-->",markdown)
print("**...finished inserting content.**")
print("**Inserting content table into template file...**")
markdown=markdown.replace("<!--PLACENAV-->",nav)
print("**...finished inserting content table.**")
print("**Saving as lua_api.html...**")
file = open('lua_api.html', 'w') # SAVE AS lua_api.html
file.write(markdown)
print("**...saved.**")
print("**Parser finished successfully and lua_api.html was generated.**")
file.close()