-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathKissanimeFrontPageScraper.py
62 lines (46 loc) · 1.66 KB
/
KissanimeFrontPageScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Importing important libraries
import cfscrape
from bs4 import BeautifulSoup as soup
# Bypassing CloudFare DDos Protection using 3rd party module called 'cfscrape'
scraper = cfscrape.create_scraper()
pageHTML = scraper.get('https://kissanime.ac/kissanime.html').content
# Grabbing HTML source code
pageSoup = soup(pageHTML,'html.parser')
# Finding the desired container from the pageSoup
containers = pageSoup.findAll('div',{'class':'item_film_list'})
# Opening file for writing csv
fileName = 'KissanimeFrontPage.csv'
# Using utf8 encoding due to compatibility issue in Windows
f = open(fileName,'w',encoding = 'utf8')
# Defining headers of csv file
headers = 'Title, Link, ThumbnailLink, Genre\n'
# Writing headers
f.write(headers)
# Running loop for each container
for container in containers:
# Video link
vidLink = container.a['href']
# Thumbnail source
thumbContainer = container.findAll('img',{'class':'thumb'})
# Converting the type from bs4.element.Tag to str
paragraphs = []
for x in thumbContainer:
paragraphs.append(str(x))
paragraphSplit = paragraphs[0].split(';')
url = paragraphSplit[4].split('=//')
finalUrl = url[1].split('"')
thumbUrl = finalUrl[0]
# Title of the Video
title = container.h3.span.text
# Genre
genresSplit = container.p.text.split('\n')
genres = genresSplit[2]
print('title: ' + title)
print('vidLink: ' + vidLink)
print('thumbUrl: ' + thumbUrl)
print('genres: ' + genres)
print('\n\n')
# Writing extrated values in csv file
f.write(title + ',' + vidLink + ',' + thumbUrl + ',' + genres.replace(',' , '|') + '\n')
# Safely closing the file
f.close()