various/crawler/index.php

296 lines
9.3 KiB
PHP
Executable File

<?php
// INCLUDE
include_once("crawl.php");
include_once("config.php");
// END
?>
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>Seed</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="description" content="Open Source Search Engine, foss database, popular sites, media, and more." />
<style rel="stylesheet" type="text/css">
@import url('https://fonts.googleapis.com/css?family=Ubuntu+Mono&display=swap');
@import url('https://fonts.googleapis.com/css2?family=Monoton&display=swap');
@media only screen and (min-width: 2001px) { body { max-width: 750px; } }
@media only screen and (max-width: 2000px) { body { max-width: 750px; } }
@media only screen and (max-width: 1400px) { body { max-width: 750px; } }
@media only screen and (max-width: 1200px) { body { max-width: 700px; } }
@media only screen and (max-width: 1000px) { body { max-width: 700px; } }
@media only screen and (max-width: 800px) { body { max-width: 700px; } }
/* @media only screen and (max-width: 700px) { body { max-width: 600px; } }
@media only screen and (max-width: 600px) { body { max-width: 500px; } }
@media only screen and (max-width: 500px) { body { max-width: 450px; } }
@media only screen and (max-width: 400px) { body { max-width: 350px; } } */
html {
font-family: "Ubuntu Mono", sans-serif;
background-color: #222222;
color: #ddddff;
padding: 4px;
margin: 0 auto;
font-size: 16px;
min-width: 700px;
}
body { margin: 0 auto; }
a { color: #9900ff; }
a:hover { color: #dd00ff; text-decoration: none; }
table { width: 100%; }
p { padding: 5px; }
.menu {
font-size: 20px;
background: #131313;
padding: 3px 3px 3px 3px;
border: solid 1px #444444;
width: 95%;
margin: auto;
border-radius: 4px;
box-shadow: 0px 0px 10px #000000;
}
.menu a {
color: silver;
text-decoration: none;
padding: 3px 10px 3px 10px;
/* some effects */
transition: ease-in .4s color;
-moz-transition: ease-in .4s color;
-webkit-transition: ease-in .4s color;
-o-transition: ease-in .4s color;
}
.menu a:hover {
color: white;
text-decoration: none;
}
.header {
font-family: "Monoton", sans-serif;
font-size: 28px;
width: 40px;
color: #aa00ff;
}
#quote {
max-width: 90%;
background-color: #000000;
border: solid 1px #101010;
radius: 4px;
font-size: 16px;
padding: 4px;
margin: 0 auto;
}
#searchbar {
font-size: 20px;
background-color: #131313;
border: solid 1px #434343;
color: #ececec;
padding: 4px;
width: 540px;
border-radius: 4px;
}
input, button {
font-size: 20px;
background-color: #131313;
border: solid 1px #434343;
color: #ececec;
padding: 4px;
border-radius: 4px;
}
.container { padding: 12px; }
.footer { font-size: 16px; color: #565656; text-align: center; }
.footer a { text-decoration: none; }
.note { color: red; font-weight: 500; padding: 7px; }
.rfloat { float: right; position: inline-block; }
</style>
</head>
<body>
<table><tr><td>
<div class="header">SEED</div>
</td><td>
<div class="menu"><!--
--><a href="index.php">Search</a><!--
--><a href="index.php?about">About</a><!--
--><a href="index.php?submit">Submit</a><!--
--></div>
</td></tr></table>
<div class="container">
<p>
<?php
// Simple search engine paired with the crawler!
// (C) Chris Dorman, 2021 CC-BY-SA 3.0
$searchcount = "0";
function filterSearchKeys($query){
$query = trim(preg_replace("/(\s+)+/", " ", $query));
$words = array();
// expand this list with your words.
$list = array("in","it","a","the","of","or","I","you","he","me","us","they","she","to","but","that","this","those","then");
$c = 0;
foreach(explode(" ", $query) as $key){
if (in_array($key, $list)){
continue;
}
$words[] = $key;
// If over X words in queue for search, stop
if ($c >= $GLOBALS['max_word_search_array']){
break;
}
$c++;
}
return $words;
}
if(isset($_GET['search']) && $_GET['search']!="")
{
$searchquery = stripslashes(htmlentities($_GET['search']));
$searchdb = file_get_contents($database);
$searchbuffer = explode("URL: ", $searchdb);
?>
<form action="index.php" method="get">
<span style="font-size: 20px; padding-right: 8px; ">Search: </span><input type="text" name="search" id="searchbar"><input type="submit" value="Go">
</form>
<br /><br />
<?php
array_shift($searchbuffer);
foreach($searchbuffer as $site) {
if(stripos($site, $searchquery) !== false) {
$url = explode("\n", $site); // $url[1]
$pretitle = explode("Title: ", $site);
$title = explode("Description: ", $pretitle[1]);
$predesc = explode("Description: ", $site); // Used to calculate descriptions with line breaks
$desc = explode("Keywords: ", $predesc[1]); // $desc[0]
echo "<a href='" . $url[0] . "'>" . $title[0] . "</a><br />\n";
echo "<p style='font-size: $descfontsize;'>"
. "<span style='color:#00BB00;padding-bottom:4px;'>" . $url[0] . "</span><br />"
. $desc[0] . "</p><br />\n";
$searchcount++;
}
// IF there's too many finds, don't flood the page
if($searchcount>$maxfinds) { break; }
}
// Didn't find any / lots of direct comparisons to the string
// lets do a simple word by word search algorithm
$searchedbuf = "";
$finalbuf = "";
if($searchcount < "25") {
$searchkeys = filterSearchKeys($searchquery);
$cnt = 1;
foreach($searchkeys as $search) {
if($cnt != count($searchkeys)) {
foreach($searchbuffer as $site2) {
if(stripos($site2, $search) !== false) {
$searchedbuf .= "URL: " . $site2 . "\n";
}
}
}
if($cnt == count($searchkeys)) {
foreach(explode("URL: ", $searchedbuf) as $sitebuf) {
if(stripos($sitebuf, $search) !== false) {
$finalbuf .= "URL: " . $sitebuf . "\n";
}
}
}
$cnt++;
}
$searchbuffer2 = explode("URL: ", $finalbuf);
array_shift($searchbuffer2);
foreach($searchbuffer2 as $searched) {
$url = explode("\n", $searched); // $url[1]
$pretitle = explode("Title: ", $searched);
$title = explode("Description: ", $pretitle[1]);
$predesc = explode("Description: ", $searched); // Used to calculate descriptions with line breaks
$desc = explode("Keywords: ", $predesc[1]); // $desc[0]
echo "<a href='" . $url[0] . "'>" . $title[0] . "</a><br />\n";
echo "<p style='font-size: $descfontsize;'>"
. "<span style='color:#00BB00;padding-bottom:4px;'>" . $url[0] . "</span><br />"
. $desc[0] . "</p><br />\n";
$searchcount++;
}
}
echo "<p>Found $searchcount results</p>";
echo "<br />\n";
}
else if(isset($_GET['crawl']) && $_GET['crawl']!="")
{
$crawlurl = htmlentities(stripslashes($_GET['crawl']));
if(filter_var($crawlurl, FILTER_VALIDATE_URL)) {
echo "Crawling $crawlurl, please wait this can take a few!<br />\n";
$crawl = crawl_page($crawlurl, $crawl_depth, $database);
if ($crawl == 3) {
echo "";
} else {
echo "$crawlurl added to database!";
}
} else {
echo "ERROR: Invalid URL format!";
}
}
else if(isset($_GET['about']))
{
echo "<h3>About seed</h3>\n";
echo "<p>Seed is the most simple yet most secure, non-tracking, open source search engine primarily around the free and open source community!"
. " Building a search engine database requires resources, and lots of crawling! Help us out in providing us with some helpful links used to"
. " add content to our database!</p>";
}
else if(isset($_GET['submit']))
{
echo "<h3>Submit a URL into the seed</h3>\n";
echo "By submitting a URL into seed's web crawler prompt, you acknowledge that you're authorizing us to crawl the given link!<br />";
echo "<form action='index.php' method='get'>";
echo "URL: <input type='text' name='crawl'><input type='submit' value='Crawl'>";
echo "</form>";
}
else
{
?>
<form action="index.php" method="get">
<span style="font-size: 20px; padding-right: 8px; ">Search: </span><input type="text" name="search" id="searchbar"><input type="submit" value="Go">
</form>
<?php
}
?>
<br />
<br />
<div id="quote">
<?php
$quote = file_get_contents("quotes/" . rand(1,80) . ".txt");
echo nl2br(stripslashes(htmlentities($quote)));
?>
</div>
</p>
</div>
<div class="footer">
&copy; Seed, 2021
</div>
</body>
</html>