-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal.php
82 lines (79 loc) · 1.63 KB
/
final.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
<!DOCTYPE html>
<html>
<head>
<title>Web Crawler in PHP</title>
</head>
<body>
<div id="content" style="margin-top:10px;height:100%;">
<center><h1>Web Crawler in PHP</h1></center>
<form action="final.php" method="POST">
URL : <input name="url" size="35" />
<input type="submit" name="submit" value="Start Crawling"/>
</form>
<?
include("simple_html_dom.php");
function crawl_site($u)
{
echo "function called";
$html= file_get_html($u);
foreach($html->find('.post-content') as $article)
{
foreach($article->find('li') as $li)
{
$es = $li->plaintext;
echo "<h1><center>";
echo $es;
echo "</center></h1>";
echo '<br>';
image($es);
}
}
}
function image($est)
{
$count=0;
$rl=seoUrl($est);
$u="https://www.google.com/search?tbm=isch&q=";
$urli=$u.$rl;
$htm = file_get_html($urli);
echo '<br>';
foreach($htm->find('img') as $li)
{
if($count==0)
{
$urld=$li->src;
echo "<center><br><img src='".$urld."'><br></center>";
$count=1;
}
}
}
function seoUrl($string)
{
$string = preg_replace("/[^a-z0-9_\s-]/", "", $string);
$string = preg_replace("/[\s-]+/", " ", $string);
$string = preg_replace("/[\s_]/", "+", $string);
return $string;
}
if(isset($_POST['submit']))
{
$url=$_POST['url'];
if($url=='')
{
echo "<h2>A valid URL please.</h2>";
}
else
{
crawl_site($url);
}
}
?>
</div>
<style>
input
{
border:none;
padding:8px;
}
</style>
</body>
</html>