Skip to content

Commit 128e477

Browse files
authored
Merge pull request #5 from pawamoy/fix-403-forbidden
Fix 403 Forbidden by providing most common user agents
2 parents bd19e72 + 6047742 commit 128e477

File tree

3 files changed

+42
-9
lines changed

3 files changed

+42
-9
lines changed

tests/tests.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,16 @@
66
import unittest
77

88
from lxml import html
9+
from requests import get
910

1011
from tpb.tpb import TPB, Search, Recent, Top, List, Paginated
1112
from tpb.constants import ConstantType, Constants, ORDERS, CATEGORIES
12-
from tpb.utils import URL
13+
from tpb.utils import URL, headers
1314

1415
if sys.version_info >= (3, 0):
15-
from urllib.request import urlopen
1616
from tests.cases import RemoteTestCase
1717
unicode = str
1818
else:
19-
from urllib2 import urlopen
2019
from cases import RemoteTestCase
2120

2221

@@ -106,8 +105,12 @@ def test_creation_dates(self):
106105
self.assertTrue(diff > 1)
107106

108107
def test_torrent_rows(self):
109-
request = urlopen(str(self.torrents.url))
110-
document = html.parse(request)
108+
request = get(
109+
str(self.torrents.url),
110+
headers=headers(),
111+
stream=True
112+
)
113+
document = html.parse(request.raw)
111114
rows = self.torrents._get_torrent_rows(document.getroot())
112115
self.assertEqual(len(rows), 30)
113116

tpb/tpb.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import sys
1919
import time
2020

21-
from .utils import URL
21+
from .utils import URL, headers
2222

2323
from requests import get
2424

@@ -56,7 +56,7 @@ def items(self):
5656
Request URL and parse response. Yield a ``Torrent`` for every torrent
5757
on page.
5858
"""
59-
request = get(str(self.url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
59+
request = get(str(self.url), headers=headers())
6060
root = html.fromstring(request.text)
6161
items = [self._build_torrent(row) for row in
6262
self._get_torrent_rows(root)]
@@ -342,7 +342,7 @@ def __init__(self, title, url, category, sub_category, magnet_link,
342342
@property
343343
def info(self):
344344
if self._info is None:
345-
request = get(str(self.url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
345+
request = get(str(self.url), headers=headers())
346346
root = html.fromstring(request.text)
347347
info = root.cssselect('#details > .nfo > pre')[0].text_content()
348348
self._info = info
@@ -353,7 +353,7 @@ def files(self):
353353
if not self._files:
354354
path = '/ajax_details_filelist.php?id={id}'.format(id=self.id)
355355
url = self.url.path(path)
356-
request = get(str(url), headers={'User-Agent' : "Magic Browser","origin_req_host" : "thepiratebay.se"})
356+
request = get(str(url), headers=headers())
357357
root = html.fromstring(request.text)
358358
rows = root.findall('.//tr')
359359
for row in rows:

tpb/utils.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
import random
23

34
from purl import URL as PURL
45

@@ -61,3 +62,32 @@ def _segment(cls, segment):
6162
fget=lambda x: cls._get_segment(x, segment),
6263
fset=lambda x, v: cls._set_segment(x, segment, v),
6364
)
65+
66+
67+
def headers():
68+
"""
69+
The Pirate Bay blocks requests (403 Forbidden)
70+
basing on User-Agent header, so it's probably better to rotate them.
71+
User-Agents taken from:
72+
https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
73+
"""
74+
return {
75+
"User-Agent": random.choice(USER_AGENTS),
76+
"origin_req_host": "thepiratebay.se",
77+
}
78+
79+
80+
USER_AGENTS = (
81+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
82+
'AppleWebKit/537.36 (KHTML, like Gecko) '
83+
'Chrome/60.0.3112.113 Safari/537.36',
84+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
85+
'AppleWebKit/537.36 (KHTML, like Gecko) '
86+
'Chrome/60.0.3112.101 Safari/537.36',
87+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
88+
'AppleWebKit/537.36 (KHTML, like Gecko) '
89+
'Chrome/60.0.3112.113 Safari/537.36',
90+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
91+
'AppleWebKit/537.36 (KHTML, like Gecko) '
92+
'Chrome/60.0.3112.113 Safari/537.36',
93+
)

0 commit comments

Comments
 (0)