-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwittermain.py~
106 lines (82 loc) · 2.91 KB
/
twittermain.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from simplejson import loads
import threading
import urllib2
import requests
import MySQLdb
import time
#db = MySQLdb.connect('localhost','root','#brainse','twitter')
#cursor = db.cursor()
#insert_sql="insert into profiles (user_id,user_name,user_url) values(%s,%s,%s)"
proxy=urllib2.ProxyHandler({})
opener=urllib2.build_opener(proxy)
opener.addheaders=[('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
from bs4 import BeautifulSoup as bs
from collections import OrderedDict
dic=OrderedDict()
main_list=[]
start="737848638"
filetracker=open("/home/sys8/twitter/twitter_track.txt","a+")
dic[737848638]="https://twitter.com/legendstevejobs"
temptracker=open("/home/sys8/twitter/temp_tracker.txt","a+")
def id_Checker(user_id):
global dic
if user_id in dic.keys():
return False
else :
return True
def pagedownloader(user_id,user_url):
url=urllib2.urlopen(user_url).read()
user_name=user_url.split('/')[3]
#filename="%s_db"%user_name
#fileopen=open("/home/akhil/Desktop/twitter/db/"+str(filename),"a")
#fileopen.write(url)
#record=[MySQLdb.escape_string(str(user_id)),MySQLdb.escape_string(str(user_name)),MySQLdb.escape_string(str(user_url))]
#print record
#cursor.execute(insert_sql,record)
#db.commit()
filetracker.write(str(user_id)+"##"+str(user_url)+"##"+str(user_name)+"\n")
temptracker.write(str(user_url)+"\n")
def urlmaker(user_id,user_url):
try:
global dic
global main_list
r=requests.get("https://twitter.com/i/related_users/"+str(user_id))
#print r.status_code
if r.status_code==requests.codes.ok:
pagedownloader(user_id,user_url)
print user_url
data=(loads(r.content))
data=data.get('related_users_html')
soup=bs(data)
div=soup.findAll("div",{"class":"content"})
for i in range(len(div)):
#print "https://twitter.com"+div[i].find("a").get("href")
user_id=div[i].find("a").get("data-user-id")
check=id_Checker(user_id)
if check:
main_list.append(div[i].find("a").get("data-user-id"))
dic[div[i].find("a").get("data-user-id")]=str("https://twitter.com"+div[i].find("a").get("href"))
#pagedownloader(user_id,user_url)
#print dic
#print main_list
except Exception as exp:
print "******************EXCEPTION********************************"
print exp
urlmaker(737848638,"https://twitter.com/legendstevejobs")
index=1
while True:
try :
if index==100:
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
print " SLEEPING "
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
time.sleep(300)
else:
if threading.activeCount()<3:
print "count is :: "+str(index)
threading.Thread(target=urlmaker,args=(dic.keys()[index],dic.values()[index],)).start()
index=index+1
except Exception as exp:
print "@@@@@@@@@@@@@@@@@@@@@@@EXCEPTION@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
print exp