-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape-1.go
66 lines (57 loc) · 1.79 KB
/
scrape-1.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"github.com/gocolly/colly"
)
func main() {
fetchURL := "https://www.imdb.com/list/ls033609554/"
fileName := "disney-movies.csv"
file, err := os.Create(fileName)
if err != nil {
log.Fatal("ERROR: Could not create file %q: %s\n", fileName, err)
return
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
// Write column headers of the text file
writer.Write([]string{"Sl. No.", "Movie Name", "Release Year", "Certificate", "Genre",
"Running time", "Rating", "Number of Votes", "Gross"})
// Instantiate the default Collector
c := colly.NewCollector()
// Before making a request, print "Visiting ..."
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting: ", r.URL)
})
// Callback when colly finds the entry point to the DOM segment having a movie info
c.OnHTML(`.lister-item-content`, func(e *colly.HTMLElement) {
//Locate and extract different pieces information about each movie
number := e.ChildText(".lister-item-index")
name := e.ChildText(".lister-item-index ~ a")
year := e.ChildText(".lister-item-year")
runtime := e.ChildText(".runtime")
certificate := e.ChildText(".certificate")
genre := e.ChildText(".genre")
rating := e.ChildText("[class='ipl-rating-star small'] .ipl-rating-star__rating")
vote := e.ChildAttr("span[name=nv]", "data-value")
gross := e.ChildText(".text-muted:contains('Gross') ~ span[name=nv]")
// Write all scraped pieces of information to output text file
writer.Write([]string{
number,
name,
year,
certificate,
runtime,
genre,
rating,
vote,
gross,
})
})
// start scraping the page under the given URL
c.Visit(fetchURL)
fmt.Println("End of scraping: ", fetchURL)
}