-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhttp_fetcher.py
128 lines (117 loc) · 4.9 KB
/
http_fetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import cctime
import prefs
import utils
# If the remote server has stopped sending data for this many milliseconds,
# assume the HTTP response is finished.
SILENCE_TIMEOUT = 10000
# To limit memory use, we read this many bytes from the network at a time.
PACKET_LENGTH = 1500 - 20 - 20 # 1500 - IP header (20) - TCP header (20)
class HttpFetcher:
def __init__(self, net):
self.net = net
self.buffer = bytearray()
def go(self, url, req_etag=None):
self.ssl, self.host, self.path = utils.split_url(url)
self.req_etag = req_etag
if not self.host:
raise ValueError(f'Invalid URL: {url}')
self.start()
def start(self):
self.buffer[:] = b''
self.silence_started = None
# Calling read() returns anywhere from zero to PACKET_LENGTH bytes, or
# None; b'' or None does not indicate EOF. StopIteration indicates EOF.
self.read = self.connect_read
def check_silence_timeout(self, is_silent):
now = cctime.monotonic_millis()
if is_silent:
if self.silence_started:
silence = int(now - self.silence_started)
if silence > SILENCE_TIMEOUT:
utils.log(f'Closing socket after {silence} s of silence.')
self.net.close()
raise StopIteration(self.resp_etag)
else:
self.silence_started = now
else:
self.silence_started = None
def connect_read(self):
self.net.step()
if self.net.state == 'OFFLINE':
self.net.join()
elif self.net.state == 'ONLINE':
# connect() will raise if it fails; there's no risk of a retry loop
self.net.connect(self.host, ssl=self.ssl)
elif self.net.state == 'CONNECTED':
etag = utils.to_bytes(self.req_etag or b'')
utils.log(f'Fetching {self.path} from {self.host} (ETag {etag}).')
self.net.send(
b'GET ' + utils.to_bytes(self.path) + b' HTTP/1.1\r\n' +
b'Host: ' + utils.to_bytes(self.host) + b'\r\n' +
b'Connection: Close\r\n' +
(b'If-None-Match: "' + etag + b'"\r\n' if etag else b'') +
b'\r\n'
)
self.read = self.http_status_read
def http_status_read(self):
self.net.step()
data = self.net.receive(PACKET_LENGTH)
self.buffer.extend(data)
crlf = self.buffer.find(b'\r\n')
self.check_silence_timeout(crlf < 0)
if crlf > 0:
status = bytes(self.buffer[:crlf]).split(b' ')[1]
self.buffer[:crlf + 2] = b''
utils.log(f'HTTP status: {status}')
if status == b'304':
raise StopIteration(304)
if status != b'200' and status != b'301' and status != b'302':
raise ValueError(f'HTTP status {status}')
self.content_length = -1
self.resp_etag = None
self.read = self.http_headers_read
def http_headers_read(self):
self.net.step()
crlf = self.buffer.find(b'\r\n')
self.check_silence_timeout(crlf < 0)
if crlf > 0:
colon = self.buffer.find(b':')
if 0 < colon < crlf:
key = bytes(self.buffer[:colon]).lower()
value = utils.to_str(bytes(self.buffer[colon + 1:crlf])).strip()
if key == b'content-length':
self.content_length = int(value)
if key == b'etag':
self.resp_etag = value.strip('"')
if key == b'location':
self.net.close()
utils.log(f'Redirection: {value}')
if value.startswith('http:') or value.startswith('https:'):
self.go(value)
elif value.startswith('/'):
self.path = value
self.start()
else:
self.path = self.path.rsplit('/', 1)[0] + '/' + value
self.start()
self.buffer[:crlf + 2] = b''
elif crlf == 0:
self.buffer[:2] = b''
self.received_length = 0
self.read = self.content_read
else:
self.buffer.extend(self.net.receive(PACKET_LENGTH))
def content_read(self):
self.net.step()
if (self.received_length >= self.content_length > 0 or # file completed
self.net.state != 'CONNECTED'): # server closed the connection
raise StopIteration(self.resp_etag)
if len(self.buffer):
chunk = self.buffer[:PACKET_LENGTH]
self.buffer[:PACKET_LENGTH] = b''
self.received_length += len(chunk)
return bytes(chunk)
chunk = self.net.receive(PACKET_LENGTH)
self.received_length += len(chunk)
self.check_silence_timeout(len(chunk) == 0)
return chunk