-
Notifications
You must be signed in to change notification settings - Fork 137
/
Copy pathchannelscraper.py
92 lines (73 loc) · 3.31 KB
/
channelscraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import asyncio
import details as ds
from telethon.sync import TelegramClient, types
import pandas as pd
from colorama import Fore, Style
api_id = ds.apiID
api_hash = ds.apiHash
phone = ds.number
async def scrape_channel_content(channel_name):
async with TelegramClient(phone, api_id, api_hash) as client:
try:
entity = await client.get_entity(channel_name)
content = []
post_count = 0
async for post in client.iter_messages(entity):
post_count += 1
text = post.text or ""
if sender := post.sender:
if isinstance(sender, types.User):
username = sender.username or "N/A"
first_name = sender.first_name or "N/A"
last_name = sender.last_name if sender.last_name else "N/A"
user_id = sender.id
else:
username = "N/A"
first_name = "N/A"
last_name = "N/A"
user_id = "N/A"
else:
username = "N/A"
first_name = "N/A"
last_name = "N/A"
user_id = "N/A"
views = post.views or "N/A"
message_url = f"https://t.me/{channel_name}/{post.id}"
content.append((text, username, first_name, last_name, user_id, views, message_url))
if post_count % 10 == 0:
print(
f"{Fore.WHITE}{post_count} Posts scraped in {Fore.LIGHTYELLOW_EX}{channel_name}{Style.RESET_ALL}")
return content
except Exception as e:
print(f"An error occurred: {Fore.RED}{e}{Style.RESET_ALL}")
return []
async def main():
try:
channel_name = input(
f"{Fore.CYAN}Please enter a target Telegram channel (e.g., https://t.me/{Fore.LIGHTYELLOW_EX}your_channel{Style.RESET_ALL}):\n")
print(f'You entered "{Fore.LIGHTYELLOW_EX}{channel_name}{Style.RESET_ALL}"')
answer = input('Is this correct? (y/n)')
if answer != 'y':
return
output_directory = f"Collection/{channel_name}"
if not os.path.exists(output_directory):
os.makedirs(output_directory)
csv_filename = f'{output_directory}/{channel_name}_messages.csv'
print(f'Scraping content from {Fore.LIGHTYELLOW_EX}{channel_name}{Style.RESET_ALL}...')
content = await scrape_channel_content(channel_name)
if content:
df = pd.DataFrame(content, columns=['Text', 'Username', 'First Name', 'Last Name', 'User ID', 'Views',
'Message URL'])
try:
df.to_csv(csv_filename, index=False)
print(
f'Successfully scraped and saved content to {Fore.LIGHTYELLOW_EX}{csv_filename}{Style.RESET_ALL}.')
except Exception as e:
print(f"An error occurred while saving to CSV: {Fore.RED}{e}{Style.RESET_ALL}")
else:
print(f'{Fore.RED}No content scraped.{Style.RESET_ALL}')
except Exception as e:
print(f"An error occurred: {Fore.RED}{e}{Style.RESET_ALL}")
if __name__ == '__main__':
asyncio.run(main())