Skip to content

Commit

Permalink
fix: Remove follow_redirects override in HttpxHttpClient (#1015)
Browse files Browse the repository at this point in the history
### Description

Allow users to disable redirect following for HttpxHttpClient.

### Issues

- Closes: #1013 

### Testing

After removing `follow_redirects=True` a couple of tests actually
failed, updating default `kwargs` fixed them.

### Checklist

- [ ] CI passed
  • Loading branch information
2tunnels authored Feb 26, 2025
1 parent daffa44 commit 88afda3
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/crawlee/http_clients/_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ async def crawl(
)

try:
response = await client.send(http_request, follow_redirects=True)
response = await client.send(http_request)
except httpx.TransportError as exc:
if self._is_proxy_error(exc):
raise ProxyError from exc
Expand Down Expand Up @@ -235,6 +235,7 @@ def _get_client(self, proxy_url: str | None) -> httpx.AsyncClient:
'proxy': proxy_url,
'http1': self._http1,
'http2': self._http2,
'follow_redirects': True,
}

# Update the default kwargs with any additional user-provided kwargs.
Expand Down
25 changes: 25 additions & 0 deletions tests/unit/http_clients/test_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,28 @@ async def test_common_headers_and_user_agent(httpbin: URL, header_network: dict)
assert 'User-Agent' in response_headers
assert 'python-httpx' not in response_headers['User-Agent']
assert response_headers['User-Agent'] in get_available_header_values(header_network, {'User-Agent', 'user-agent'})


async def test_crawl_follow_redirects_by_default(http_client: HttpxHttpClient, httpbin: URL) -> None:
final_url = str(httpbin / 'get')
redirect_url = str((httpbin / 'redirect-to').with_query(url=final_url))
request = Request.from_url(redirect_url)

crawling_result = await http_client.crawl(request)

assert crawling_result.http_response.status_code == 200
assert request.loaded_url == final_url


async def test_crawl_follow_redirects_false(httpbin: URL) -> None:
http_client = HttpxHttpClient(follow_redirects=False)

final_url = str(httpbin / 'get')
redirect_url = str((httpbin / 'redirect-to').with_query(url=final_url))
request = Request.from_url(redirect_url)

crawling_result = await http_client.crawl(request)

assert crawling_result.http_response.status_code == 302
assert crawling_result.http_response.headers['Location'] == final_url
assert request.loaded_url == redirect_url

0 comments on commit 88afda3

Please sign in to comment.