-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetProblems.py
49 lines (34 loc) · 1.34 KB
/
getProblems.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import json
import time
import httpx
import asyncio
from bs4 import BeautifulSoup
from logger import getLogger
from customTypes import Function
from helper import extractProblemsFromResponse, makeBulkRequests
logger = getLogger(__name__)
async def getAllProblemUrls(req: Function, ses: httpx.AsyncClient):
"""
Gets all problem urls from toph.co/problems/all
Args:
req (Function): httpx.AsyncClient.get
ses (httpx.AsyncClient): httpx.AsyncClient
Returns:
list[string]: List of problem urls
"""
logger.info(f"Getting all problem urls /p/all")
rootUrl = "http://toph.co/problems/all"
responses = await makeBulkRequests([rootUrl + f"?start={i*25}&sort=title" for i in range(1, 79)], req, ses)
logger.info(f"Extracting problem urls from response")
allProblems = []
for resp in responses:
if isinstance(resp, Exception):
logger.error(f"Error while extracting problem urls: {resp.url}")
continue
x = extractProblemsFromResponse(resp)
allProblems.extend(x)
allProblems = ["http://toph.co" + i for i in allProblems]
with open("Data/allProblems.txt", "w") as f:
f.write("\n".join(allProblems))
logger.info(f"Got {len(allProblems)} problem urls and saved to file")
return allProblems