ModIndexer/Program.cs
SmallJoker a7eaf2398f Huge rewrite
Clean up the mess and use enums
Move helper functions to Misc.cs
2017-02-25 23:48:28 +01:00

565 lines
14 KiB
C#

using System;
using System.Net;
using System.Net.Sockets;
using System.Threading;
using System.Collections.Generic;
using HtmlAgilityPack;
using Newtonsoft.Json;
namespace ModIndexer
{
class Program
{
static void Main(string[] args)
{
Console.Title = "Minetest Mod Indexer - Let me index the stuff for you.";
Console.Write("Select a mode:\n" +
"\n\t1) Released mods [full]" +
"\n\t2) WIP mods [full]" +
"\n\t3) Released games [full]" +
"\n\t4) WIP games [full]" +
"\n\t5) Old mods [fast]" +
"\n\t6) Exit" +
"\n\nYour choice: ");
ConsoleKeyInfo k = new ConsoleKeyInfo();
while (k.Key < ConsoleKey.D1 ||
k.Key > ConsoleKey.D6) {
k = Console.ReadKey(true);
if (k.Key == ConsoleKey.D6)
return;
}
Console.WriteLine(k.KeyChar + "\n");
Misc.FETCH_TYPE forum;
switch (k.Key) {
case ConsoleKey.D1:
forum = Misc.FETCH_TYPE.REL_MODS;
break;
case ConsoleKey.D2:
forum = Misc.FETCH_TYPE.WIP_MODS;
break;
case ConsoleKey.D3:
forum = Misc.FETCH_TYPE.REL_GAMES;
break;
case ConsoleKey.D4:
forum = Misc.FETCH_TYPE.WIP_GAMES;
break;
case ConsoleKey.D5:
forum = Misc.FETCH_TYPE.OLD_MODS;
break;
default:
Console.WriteLine("Error: Keys are not properly implemented!");
return;
}
Console.Write("Start page: ");
string start = Console.ReadLine();
Console.Write("End page: ");
string stop = Console.ReadLine();
new Engine(forum, start, stop);
Console.WriteLine("=== DONE!\nPress any key to exit.");
Console.ReadKey(false);
}
}
class Engine
{
System.Text.Encoding enc = System.Text.Encoding.UTF8;
List<ForumData> update_data;
WebClient cli = new WebClient();
Misc.FETCH_TYPE forum;
public Engine(Misc.FETCH_TYPE forum, string start, string stop)
{
this.forum = forum;
update_data = new List<ForumData>();
ServicePointManager.ServerCertificateValidationCallback += ValidateRemoteCertificate;
int begin, end;
if (!int.TryParse(start, out begin))
begin = 1;
if (!int.TryParse(stop, out end))
end = 1;
for (int i = begin - 1; i < end; i++)
FetchTopicList(i);
byte[] answer = Config.Upload(ref update_data);
try {
int[] topics = JsonConvert.DeserializeObject<int[]>(enc.GetString(answer));
for (int i = 0; i < topics.Length; i++) {
foreach (ForumData d in update_data) {
if (d.topicId == topics[i]) {
Console.WriteLine("\t" + d.title);
break;
}
}
}
} catch (Exception e) {
Console.WriteLine(e.ToString());
//Console.WriteLine(enc.GetString(answer));
}
Console.WriteLine("Done.");
}
bool ValidateRemoteCertificate(object sender,
System.Security.Cryptography.X509Certificates.X509Certificate cert,
System.Security.Cryptography.X509Certificates.X509Chain chain,
System.Net.Security.SslPolicyErrors error)
{
// If the certificate is a valid, signed certificate, return true.
if (error == System.Net.Security.SslPolicyErrors.None ||
error == System.Net.Security.SslPolicyErrors.RemoteCertificateNameMismatch)
return true;
Console.WriteLine("X509Certificate [{0}] Policy Error: '{1}'",
cert.Subject,
error.ToString());
return true;
}
// Download page and convert to a HtmlNode object
HtmlNodeCollection OpenPage(string url, string nodes)
{
string text = enc.GetString(cli.DownloadData(url));
HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(text);
if (htmlDoc.ParseErrors != null) {
int count = 0;
foreach (HtmlParseError er in htmlDoc.ParseErrors) {
count++;
}
if (count > 0) {
Console.WriteLine("ParseErrors: " + count);
}
}
// If valid html page
if (htmlDoc.DocumentNode == null) {
Console.WriteLine("DocumentNode null");
return null;
}
HtmlNodeCollection bodyNode = htmlDoc.DocumentNode.SelectNodes(nodes);
if (bodyNode == null) {
Console.WriteLine("bodyNode null");
return null;
}
return bodyNode;
}
// Analyze the topic list in a forum
void FetchTopicList(int page)
{
Console.WriteLine("=== Forum {0} ({1}) - Page {2}",
forum.ToString(), (int)forum, page + 1);
HtmlNodeCollection bodyNode = OpenPage(
"https://forum.minetest.net/viewforum.php?f=" + (int)forum + "&start=" + (page * 30),
"//ul[@class='topiclist topics']//dt/a");
if (bodyNode == null)
return;
bool get_author = false;
string title = "",
author = "";
int topicId = 0,
authorId = 0;
foreach (HtmlNode dtNode in bodyNode) {
#region extract IDs
string link = dtNode.GetAttributeValue("href", "");
link = link.Replace("&amp;", "&");
string[] linkArgs = link.Split('&');
if (!get_author) {
for (int i = 0; i < linkArgs.Length; i++) {
if (linkArgs[i].Length < 3)
continue;
string first = linkArgs[i][0].ToString() + linkArgs[i][1];
if (first == "t=") {
topicId = int.Parse(linkArgs[i].Remove(0, 2));
}
}
title = dtNode.InnerText;
} else {
for (int i = 0; i < linkArgs.Length; i++) {
if (linkArgs[i].Length < 3)
continue;
string first = linkArgs[i][0].ToString() + linkArgs[i][1];
if (first == "u=") {
authorId = int.Parse(linkArgs[i].Remove(0, 2));
}
}
author = dtNode.InnerText;
}
#endregion
if (!get_author || title.Length < 10)
goto flip;
#region filter
Misc.DATA_TYPE type = Misc.DATA_TYPE.INVALID;
string mod_name,
mod_tag;
title = parseTitle(title, out mod_name, out mod_tag);
switch (mod_tag) {
case "mod":
case "old mod":
switch (forum) {
case Misc.FETCH_TYPE.REL_MODS:
type = Misc.DATA_TYPE.REL_MOD;
break;
case Misc.FETCH_TYPE.WIP_MODS:
type = Misc.DATA_TYPE.WIP_GAME;
break;
case Misc.FETCH_TYPE.OLD_MODS:
type = Misc.DATA_TYPE.OLD_MOD;
break;
default:
Console.WriteLine("INFO: Found a mod in the wrong place");
break;
}
break;
case "modpack":
case "old modpack":
switch (forum) {
case Misc.FETCH_TYPE.REL_MODS:
type = Misc.DATA_TYPE.REL_MP;
break;
case Misc.FETCH_TYPE.WIP_MODS:
type = Misc.DATA_TYPE.WIP_MP;
break;
case Misc.FETCH_TYPE.OLD_MODS:
type = Misc.DATA_TYPE.OLD_MOD;
break;
default:
Console.WriteLine("INFO: Found a modpack in the wrong place");
break;
}
break;
case "game":
case "subgame":
switch (forum) {
case Misc.FETCH_TYPE.REL_GAMES:
type = Misc.DATA_TYPE.REL_MP;
break;
case Misc.FETCH_TYPE.WIP_GAMES:
case Misc.FETCH_TYPE.WIP_MODS:
type = Misc.DATA_TYPE.WIP_MP;
break;
/*case Misc.FETCH_TYPE.OLD_GAMES:
type = Misc.DATA_TYPE.OLD_GAME; // TODO
break;*/
default:
Console.WriteLine("INFO: Found a subgame in the wrong place");
break;
}
break;
}
if (type == Misc.DATA_TYPE.INVALID) {
Console.WriteLine("INFO: Don't know where to put this mod:" +
"\n\t(ID) Title: ({0}) {1}", topicId, title);
goto flip;
}
#endregion
string download = "";
if (forum != Misc.FETCH_TYPE.OLD_MODS) {
// Fetch topics, get download/source links
bool is_git = FetchSingleTopic(topicId, author, mod_name, ref download);
// TODO: Find an use for is_git
}
/*Console.WriteLine("Found mod: " + mod_name +
"\n\tTag: " + mod_tag +
"\n\tLink: " + download +
"\n\tType: " + (int)type + " " + type.ToString());*/
update_data.Add(new ForumData(
topicId,
title.EscapeXML(),
(int)type,
authorId,
author,
download
));
// Empty for next fetch
author = "";
title = "";
flip:
get_author ^= true;
}
}
// Analyze topic contents and get link
bool FetchSingleTopic(int topicId, string author, string mod_name, ref string link)
{
Console.WriteLine("=== Topic " + topicId);
Thread.Sleep(200);
link = "";
HtmlNodeCollection bodyNode = OpenPage(
"https://forum.minetest.net/viewtopic.php?t=" + topicId,
"//div[@class='content']");
if (bodyNode == null)
return false;
HtmlNodeCollection content = bodyNode[0].SelectNodes(".//a[@class='postlink']");
if (content == null)
return false;
string download = "", source = "";
int forum_download = 0;
foreach (HtmlNode dtNode in content) {
string url = dtNode.GetAttributeValue("href", "");
string text = dtNode.InnerText;
if (url.EndsWith(".git")) {
source = url;
continue;
}
if (url[url.Length - 1] == '/') {
url = url.Remove(url.Length - 1);
}
if (url.StartsWith("./download/file.php?id=")) {
int pos = 23;
int number = 0;
while (pos < url.Length) {
char cur = url[pos];
if (cur < 48 || cur > 57)
break;
number = number * 10 + (cur - 48);
pos++;
}
string text_lower = text.ToLower().Replace('-', '_');
if (text_lower.Contains(mod_name) &&
number > forum_download) {
forum_download = number;
}
} else if (url.Contains(".zip") ||
url.Contains("/zipball/") ||
url.Contains("/tarball/") ||
url.Contains("/archive/") ||
url.Contains("mediafire.com/")) {
// Direct download link
if (url.Contains("://ompldr.org"))
continue;
bool contains_git = url.Contains("git");
if (contains_git) {
byte count = 0,
pos = 0;
for (byte i = 0; i < url.Length; i++) {
if (url[i] == '/') {
if (count == 4)
pos = i;
count++;
}
}
if (count == 6) {
source = url.Substring(0, pos);
// Try to find another link if it's not contained in the name
string src_lower = source.ToLower().Replace('-', '_');
if (src_lower.Contains(mod_name) ||
(src_lower.Contains(author.ToLower()) && source == ""))
break;
}
}
if (download == "")
download = url;
} else if (url.Contains("://github.com/")
|| url.Contains("://notabug.org/")
|| url.Contains("://bitbucket.org/")) {
if (url.Contains("/minetest/minetest") ||
url.Contains("/commits"))
continue;
byte count = 0,
pos = 0;
for (byte i = 0; i < url.Length; i++) {
if (url[i] == '/') {
if (count == 4) {
// If it's too long, cut it off
pos = i;
}
count++;
}
}
if (count < 4 || count > 5)
continue;
// url.EndsWith("/tree") || url.EndsWith("/master")
// //github/user/proj/master
if (count == 5)
source = url.Substring(0, pos);
else
source = url;
string src_lower = source.ToLower().Replace('-', '_');
if (src_lower.Contains(mod_name) ||
(src_lower.Contains(author.ToLower()) && source == ""))
break;
}
}
if (source == "" &&
download == "" &&
forum_download == 0)
return false;
link = source != "" ? source : download;
if (link == "" && forum_download > 0)
link = "https://forum.minetest.net/download/file.php?id=" + forum_download;
return source != "";
}
// Remove useless tags from the forum titles
const string MODNAME_ALLOWED_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_-";
// Content of [tags]
string[] bad_content = { "wip", "beta" };
// Beginnings of [tags]
string[] bad_prefix = { "minetest", "mod", "mods", "git" };
// [tags] to identify a mod
string[] identifiers = { "mod", "modpack", "game" };
string parseTitle(string title, out string mod_name, out string mod_tag)
{
mod_tag = "";
mod_name = "";
string raw = title;
int pos = 0,
open_pos = 0;
bool opened = false,
delete = false;
for (int i = 0; pos < raw.Length; i++, pos++) {
char cur = title[i];
if (cur == '[' || cur == '{') {
opened = true;
open_pos = pos;
continue;
}
if (cur == ']' || cur == '}') {
// Tag closed
opened = false;
int len = pos - open_pos + 1;
string content = raw.Substring(open_pos + 1, len - 2).ToLower().Trim();
double num = 0.0f;
bool is_number = double.TryParse(content, out num);
if (!is_number && mod_tag == ""
&& identifiers.IndexOf(content) != -1) {
// Mod tag detected
mod_tag = content;
delete = true;
}
if (delete || is_number
|| bad_content.IndexOf(content) != -1
|| identifiers.IndexOf(content) != -1) {
// Remove this tag
raw = raw.Remove(open_pos, len);
pos -= len;
delete = false;
continue;
}
content = content.Replace('-', '_');
int start_substr = 0;
foreach (string prefix in bad_prefix) {
if (content.Length <= start_substr + prefix.Length + 1)
break;
if (content.Substring(start_substr, prefix.Length) == prefix)
start_substr += prefix.Length;
if (content[start_substr] == '_')
start_substr++;
}
if (start_substr == 0) {
// Everything fine, nothing to replace
mod_name = content;
} else {
// Replace this tag with the proper name
mod_name = content.Substring(start_substr);
raw = raw.Remove(open_pos, len);
pos -= len;
string to_insert = "[" + mod_name + "]";
raw.Insert(pos, to_insert);
pos += to_insert.Length;
}
delete = false;
}
if (opened && MODNAME_ALLOWED_CHARS.IndexOf(cur) == -1) {
delete = true;
}
}
delete = true;
pos = 0;
// Trim double whitespaces
char[] ret = new char[raw.Length];
for (int i = 0; i < raw.Length; i++) {
char cur = raw[i];
bool is_space = char.IsWhiteSpace(cur);
if (delete && is_space)
continue;
if (is_space && i == raw.Length - 1)
continue;
delete = is_space;
ret[pos] = cur;
pos++;
}
if (pos < ret.Length)
Array.Resize(ref ret, pos);
return new string(ret);
}
}
}