using System; using System.Net; using System.Net.Sockets; using System.Threading; using System.Collections.Generic; using HtmlAgilityPack; using Newtonsoft.Json; namespace ModIndexer { class Program { static void Main(string[] args) { Console.Title = "Minetest Mod Indexer - Let me index the stuff for you."; Console.Write("Select a mode:\n" + "\n\t1) Released mods [full]" + "\n\t2) WIP mods [full]" + "\n\t3) Released games [full]" + "\n\t4) WIP games [full]" + "\n\t5) Old mods [fast]" + "\n\t6) Exit" + "\n\nYour choice: "); ConsoleKeyInfo k = new ConsoleKeyInfo(); while (k.Key < ConsoleKey.D1 || k.Key > ConsoleKey.D6) { k = Console.ReadKey(true); if (k.Key == ConsoleKey.D6) return; } Console.WriteLine(k.KeyChar + "\n"); Misc.FETCH_TYPE forum; switch (k.Key) { case ConsoleKey.D1: forum = Misc.FETCH_TYPE.REL_MODS; break; case ConsoleKey.D2: forum = Misc.FETCH_TYPE.WIP_MODS; break; case ConsoleKey.D3: forum = Misc.FETCH_TYPE.REL_GAMES; break; case ConsoleKey.D4: forum = Misc.FETCH_TYPE.WIP_GAMES; break; case ConsoleKey.D5: forum = Misc.FETCH_TYPE.OLD_MODS; break; default: Console.WriteLine("Error: Keys are not properly implemented!"); return; } Console.Write("Start page: "); string start = Console.ReadLine(); Console.Write("End page: "); string stop = Console.ReadLine(); new Engine(forum, start, stop); Console.WriteLine("=== DONE!\nPress any key to exit."); Console.ReadKey(false); } } class Engine { System.Text.Encoding enc = System.Text.Encoding.UTF8; List update_data; WebClient cli = new WebClient(); Misc.FETCH_TYPE forum; public Engine(Misc.FETCH_TYPE forum, string start, string stop) { this.forum = forum; update_data = new List(); ServicePointManager.ServerCertificateValidationCallback += ValidateRemoteCertificate; int begin, end; if (!int.TryParse(start, out begin)) begin = 1; if (!int.TryParse(stop, out end)) end = 1; for (int i = begin - 1; i < end; i++) FetchTopicList(i); byte[] answer = Config.Upload(ref update_data); try { int[] topics = JsonConvert.DeserializeObject(enc.GetString(answer)); for (int i = 0; i < topics.Length; i++) { foreach (ForumData d in update_data) { if (d.topicId == topics[i]) { Console.WriteLine("\t" + d.title); break; } } } } catch (Exception e) { Console.WriteLine(e.ToString()); //Console.WriteLine(enc.GetString(answer)); } Console.WriteLine("Done."); } bool ValidateRemoteCertificate(object sender, System.Security.Cryptography.X509Certificates.X509Certificate cert, System.Security.Cryptography.X509Certificates.X509Chain chain, System.Net.Security.SslPolicyErrors error) { // If the certificate is a valid, signed certificate, return true. if (error == System.Net.Security.SslPolicyErrors.None || error == System.Net.Security.SslPolicyErrors.RemoteCertificateNameMismatch) return true; Console.WriteLine("X509Certificate [{0}] Policy Error: '{1}'", cert.Subject, error.ToString()); return true; } // Download page and convert to a HtmlNode object HtmlNodeCollection OpenPage(string url, string nodes) { string text = enc.GetString(cli.DownloadData(url)); HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(text); if (htmlDoc.ParseErrors != null) { int count = 0; foreach (HtmlParseError er in htmlDoc.ParseErrors) { count++; } if (count > 0) { Console.WriteLine("ParseErrors: " + count); } } // If valid html page if (htmlDoc.DocumentNode == null) { Console.WriteLine("DocumentNode null"); return null; } HtmlNodeCollection bodyNode = htmlDoc.DocumentNode.SelectNodes(nodes); if (bodyNode == null) { Console.WriteLine("bodyNode null"); return null; } return bodyNode; } // Analyze the topic list in a forum void FetchTopicList(int page) { Console.WriteLine("=== Forum {0} ({1}) - Page {2}", forum.ToString(), (int)forum, page + 1); HtmlNodeCollection bodyNode = OpenPage( "https://forum.minetest.net/viewforum.php?f=" + (int)forum + "&start=" + (page * 30), "//ul[@class='topiclist topics']//dt/a"); if (bodyNode == null) return; bool get_author = false; string title = "", author = ""; int topicId = 0, authorId = 0; foreach (HtmlNode dtNode in bodyNode) { #region extract IDs string link = dtNode.GetAttributeValue("href", ""); link = link.Replace("&", "&"); string[] linkArgs = link.Split('&'); if (!get_author) { for (int i = 0; i < linkArgs.Length; i++) { if (linkArgs[i].Length < 3) continue; string first = linkArgs[i][0].ToString() + linkArgs[i][1]; if (first == "t=") { topicId = int.Parse(linkArgs[i].Remove(0, 2)); } } title = dtNode.InnerText; } else { for (int i = 0; i < linkArgs.Length; i++) { if (linkArgs[i].Length < 3) continue; string first = linkArgs[i][0].ToString() + linkArgs[i][1]; if (first == "u=") { authorId = int.Parse(linkArgs[i].Remove(0, 2)); } } author = dtNode.InnerText; } #endregion if (!get_author || title.Length < 10) goto flip; #region filter Misc.DATA_TYPE type = Misc.DATA_TYPE.INVALID; string mod_name, mod_tag; title = parseTitle(title, out mod_name, out mod_tag); switch (mod_tag) { case "mod": case "old mod": switch (forum) { case Misc.FETCH_TYPE.REL_MODS: type = Misc.DATA_TYPE.REL_MOD; break; case Misc.FETCH_TYPE.WIP_MODS: type = Misc.DATA_TYPE.WIP_GAME; break; case Misc.FETCH_TYPE.OLD_MODS: type = Misc.DATA_TYPE.OLD_MOD; break; default: Console.WriteLine("INFO: Found a mod in the wrong place"); break; } break; case "modpack": case "old modpack": switch (forum) { case Misc.FETCH_TYPE.REL_MODS: type = Misc.DATA_TYPE.REL_MP; break; case Misc.FETCH_TYPE.WIP_MODS: type = Misc.DATA_TYPE.WIP_MP; break; case Misc.FETCH_TYPE.OLD_MODS: type = Misc.DATA_TYPE.OLD_MOD; break; default: Console.WriteLine("INFO: Found a modpack in the wrong place"); break; } break; case "game": case "subgame": switch (forum) { case Misc.FETCH_TYPE.REL_GAMES: type = Misc.DATA_TYPE.REL_GAME; break; case Misc.FETCH_TYPE.WIP_GAMES: case Misc.FETCH_TYPE.WIP_MODS: type = Misc.DATA_TYPE.WIP_GAME; break; /*case Misc.FETCH_TYPE.OLD_GAMES: type = Misc.DATA_TYPE.OLD_GAME; // TODO break;*/ default: Console.WriteLine("INFO: Found a subgame in the wrong place"); break; } break; } if (type == Misc.DATA_TYPE.INVALID) { Console.WriteLine("INFO: Don't know where to put this mod:" + "\n\t(ID) Title: ({0}) {1}", topicId, title); goto flip; } #endregion string download = ""; if (forum != Misc.FETCH_TYPE.OLD_MODS) { // Fetch topics, get download/source links bool is_git = FetchSingleTopic(topicId, author, mod_name, ref download); // TODO: Find an use for is_git } /*Console.WriteLine("Found mod: " + mod_name + "\n\tTag: " + mod_tag + "\n\tLink: " + download + "\n\tType: " + (int)type + " " + type.ToString());*/ update_data.Add(new ForumData( topicId, title.EscapeXML(), (int)type, authorId, author, download )); // Empty for next fetch author = ""; title = ""; flip: get_author ^= true; } } // Analyze topic contents and get link bool FetchSingleTopic(int topicId, string author, string mod_name, ref string link) { Console.WriteLine("=== Topic " + topicId); Thread.Sleep(200); link = ""; HtmlNodeCollection bodyNode = OpenPage( "https://forum.minetest.net/viewtopic.php?t=" + topicId, "//div[@class='content']"); if (bodyNode == null) return false; HtmlNodeCollection content = bodyNode[0].SelectNodes(".//a[@class='postlink']"); if (content == null) return false; string download = "", source = ""; int forum_download = 0; foreach (HtmlNode dtNode in content) { string url = dtNode.GetAttributeValue("href", ""); string text = dtNode.InnerText; if (url.EndsWith(".git")) { source = url; continue; } if (url[url.Length - 1] == '/') { url = url.Remove(url.Length - 1); } if (url.StartsWith("./download/file.php?id=")) { int pos = 23; int number = 0; while (pos < url.Length) { char cur = url[pos]; if (cur < 48 || cur > 57) break; number = number * 10 + (cur - 48); pos++; } string text_lower = text.ToLower().Replace('-', '_'); if (text_lower.Contains(mod_name) && number > forum_download) { forum_download = number; } } else if (url.Contains(".zip") || url.Contains("/zipball/") || url.Contains("/tarball/") || url.Contains("/archive/") || url.Contains("mediafire.com/")) { // Direct download link if (url.Contains("://ompldr.org")) continue; bool contains_git = url.Contains("git"); if (contains_git) { byte count = 0, pos = 0; for (byte i = 0; i < url.Length; i++) { if (url[i] == '/') { if (count == 4) pos = i; count++; } } if (count == 6) { source = url.Substring(0, pos); // Try to find another link if it's not contained in the name string src_lower = source.ToLower().Replace('-', '_'); if (src_lower.Contains(mod_name) || (src_lower.Contains(author.ToLower()) && source == "")) break; } } if (download == "") download = url; } else if (url.Contains("://github.com/") || url.Contains("://notabug.org/") || url.Contains("://bitbucket.org/")) { if (url.Contains("/minetest/minetest") || url.Contains("/commits")) continue; byte count = 0, pos = 0; for (byte i = 0; i < url.Length; i++) { if (url[i] == '/') { if (count == 4) { // If it's too long, cut it off pos = i; } count++; } } if (count < 4 || count > 5) continue; // url.EndsWith("/tree") || url.EndsWith("/master") // //github/user/proj/master if (count == 5) source = url.Substring(0, pos); else source = url; string src_lower = source.ToLower().Replace('-', '_'); if (src_lower.Contains(mod_name) || (src_lower.Contains(author.ToLower()) && source == "")) break; } } if (source == "" && download == "" && forum_download == 0) return false; link = source != "" ? source : download; if (link == "" && forum_download > 0) link = "https://forum.minetest.net/download/file.php?id=" + forum_download; return source != ""; } // Remove useless tags from the forum titles const string MODNAME_ALLOWED_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789_-"; // Content of [tags] string[] bad_content = { "wip", "beta", "test", "code", "indev", "git", "github" }; // Beginnings of [mod-my_doors5] for wrong formatted titles string[] bad_prefix = { "minetest", "mod", "mods" }; // [tags] to identify a mod string[] identifiers = { "mod", "modpack", "game" }; string parseTitle(string title, out string mod_name, out string mod_tag) { mod_tag = ""; mod_name = ""; string raw = title; int pos = 0, open_pos = 0; bool opened = false, delete = false; for (int i = 0; pos < raw.Length; i++, pos++) { char cur = title[i]; if (cur == '[' || cur == '{') { opened = true; open_pos = pos; continue; } if (cur == ']' || cur == '}') { // Tag closed opened = false; int len = pos - open_pos + 1; string content = raw.Substring(open_pos + 1, len - 2).ToLower().Trim(); double num = 0.0f; bool is_number = double.TryParse(content, out num); if (!is_number && mod_tag == "" && identifiers.IndexOf(content) != -1) { // Mod tag detected mod_tag = content; delete = true; } if (delete || is_number || bad_content.IndexOf(content) != -1 || identifiers.IndexOf(content) != -1) { // Remove this tag raw = raw.Remove(open_pos, len); pos -= len; delete = false; continue; } content = content.Replace('-', '_'); int start_substr = 0; foreach (string prefix in bad_prefix) { if (content.Length <= start_substr + prefix.Length + 1) break; if (content.Substring(start_substr, prefix.Length) == prefix) start_substr += prefix.Length; if (content[start_substr] == '_') start_substr++; } if (start_substr == 0) { // Everything fine, nothing to replace mod_name = content; } else { // Replace this tag with the proper name mod_name = content.Substring(start_substr); raw = raw.Remove(open_pos, len); pos -= len; string to_insert = "[" + mod_name + "]"; raw.Insert(pos, to_insert); pos += to_insert.Length; } delete = false; } if (opened && MODNAME_ALLOWED_CHARS.IndexOf(cur) == -1) { delete = true; } } delete = true; pos = 0; // Trim double whitespaces char[] ret = new char[raw.Length]; for (int i = 0; i < raw.Length; i++) { char cur = raw[i]; bool is_space = char.IsWhiteSpace(cur); if (delete && is_space) continue; if (is_space && i == raw.Length - 1) continue; delete = is_space; ret[pos] = cur; pos++; } if (pos < ret.Length) Array.Resize(ref ret, pos); return new string(ret); } } }