Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Fix IP URL previews on Python 3 #4215

Merged
merged 31 commits into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9e0ae2f
fix
hawkowl Nov 21, 2018
1474953
changelog
hawkowl Nov 21, 2018
31e0fd9
fixes
hawkowl Nov 21, 2018
1520252
fixes
hawkowl Nov 21, 2018
ec14bca
fixes
hawkowl Nov 21, 2018
7af04df
fix pep8
hawkowl Nov 21, 2018
606e39b
fix pep8
hawkowl Nov 21, 2018
759169b
add some code coverage to blacklists
hawkowl Nov 22, 2018
a2f7ae1
tests
hawkowl Nov 26, 2018
a94ca42
Merge remote-tracking branch 'origin/develop' into hawkowl/ip-preview
hawkowl Nov 26, 2018
ca82572
fix pep8
hawkowl Nov 26, 2018
4c246c0
Merge remote-tracking branch 'origin/develop' into hawkowl/ip-preview
hawkowl Nov 28, 2018
fc1dd48
cleanup
hawkowl Nov 28, 2018
c70d2a1
log when we block
hawkowl Nov 30, 2018
5756f64
docstring
hawkowl Nov 30, 2018
0421f0b
docstring
hawkowl Nov 30, 2018
e59a5ee
comment
hawkowl Nov 30, 2018
4357b85
Merge remote-tracking branch 'origin/develop' into hawkowl/ip-preview
hawkowl Dec 5, 2018
d82e498
fix
hawkowl Dec 5, 2018
1210131
merge in tests
hawkowl Dec 5, 2018
6fade49
fix
hawkowl Dec 5, 2018
168a494
fix
hawkowl Dec 5, 2018
920625f
fix
hawkowl Dec 5, 2018
7d17b47
fix py2
hawkowl Dec 5, 2018
1d40dd5
Merge remote-tracking branch 'origin/develop' into hawkowl/ip-preview
hawkowl Dec 14, 2018
79a5655
fix missing hyperlink
hawkowl Dec 14, 2018
aac9bb4
Merge remote-tracking branch 'origin/develop' into hawkowl/ip-preview
hawkowl Dec 20, 2018
34952ce
review comments
hawkowl Dec 20, 2018
9e64ab8
review comments
hawkowl Dec 20, 2018
3b9e190
urllib does what we need here
hawkowl Dec 20, 2018
cfa8f6f
Merge branch 'develop' into hawkowl/ip-preview
hawkowl Dec 21, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
review comments
  • Loading branch information
hawkowl committed Dec 20, 2018
commit 34952cefe719eb03e004913499778c353f119b92
132 changes: 79 additions & 53 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,35 @@
)


def check_against_blacklist(ip_address, whitelist, blacklist):
if ip_address in blacklist:
if whitelist is None or ip_address not in whitelist:
def check_against_blacklist(ip_address, ip_whitelist, ip_blacklist):
"""
Args:
ip_address (netaddr.IPAddress)
ip_whitelist (netaddr.IPSet)
ip_blacklist (netaddr.IPSet)
"""
if ip_address in ip_blacklist:
if ip_whitelist is None or ip_address not in ip_whitelist:
return True
return False


class IPBlacklistingResolver(object):
def __init__(self, reactor, whitelist, blacklist):
"""
A proxy for reactor.nameResolver which only produces non-blacklisted IP
addresses, preventing DNS rebinding attacks on URL preview.
"""

def __init__(self, reactor, ip_whitelist, ip_blacklist):
"""
Args:
reactor (twisted.internet.reactor)
ip_whitelist (netaddr.IPSet)
ip_blacklist (netaddr.IPSet)
"""
self._reactor = reactor
self._whitelist = whitelist
self._blacklist = blacklist
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist

def resolveHostName(self, recv, hostname, portNumber=0):

Expand All @@ -82,7 +99,7 @@ def addressResolved(address):
ip_address = IPAddress(address.host)

if check_against_blacklist(
ip_address, self._whitelist, self._blacklist
ip_address, self._ip_whitelist, self._ip_blacklist
):
logger.info(
"Dropped %s from DNS resolution to %s" % (ip_address, hostname)
Expand Down Expand Up @@ -111,10 +128,22 @@ def _callback(addrs):


class BlacklistingAgentWrapper(Agent):
def __init__(self, agent, reactor, whitelist=None, blacklist=None):
"""
An Agent wrapper which will prevent access to IP addresses being accessed
directly (without an IP address lookup).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really clear what you mean by "an IP address lookup" here.

Is this just an optimisation? I'm not sure the extra complexity it introduces makes it worthwhile if so.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accessing an IP address directly means that it doesn't do a DNS lookup for said IP address.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, so it never hits the IPBlacklistingResolver? Is this not a problem if we get a 302 redirect to an IP address?

"""

def __init__(self, agent, reactor, ip_whitelist=None, ip_blacklist=None):
"""
Args:
agent (twisted.web.client.Agent): The Agent to wrap.
reactor (twisted.internet.reactor)
ip_whitelist (netaddr.IPSet)
ip_blacklist (netaddr.IPSet)
"""
self._agent = agent
self._whitelist = whitelist
self._blacklist = blacklist
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist

def request(self, method, uri, headers=None, bodyProducer=None):
from hyperlink import URL
Expand All @@ -124,7 +153,9 @@ def request(self, method, uri, headers=None, bodyProducer=None):
try:
ip_address = IPAddress(h.host)

if check_against_blacklist(ip_address, self._whitelist, self._blacklist):
if check_against_blacklist(
ip_address, self._ip_whitelist, self._ip_blacklist
):
logger.info(
"Blocking access to %s because of blacklist" % (ip_address,)
)
Expand All @@ -145,58 +176,35 @@ class SimpleHttpClient(object):
using HTTP in Matrix
"""

def __init__(self, hs, treq_args={}, whitelist=None, blacklist=None):
def __init__(self, hs, treq_args={}, ip_whitelist=None, ip_blacklist=None):
"""
Args:
hs (synapse.server.HomeServer)
treq_args (dict): Extra keyword arguments to be given to treq.request.
blacklist (netaddr.IPSet): The IP addresses that are blacklisted that
ip_blacklist (netaddr.IPSet): The IP addresses that are blacklisted that
we may not request.
whitelist (netaddr.IPSet): The whitelisted IP addresses, that we can
ip_whitelist (netaddr.IPSet): The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
"""
self.hs = hs

self._whitelist = whitelist
self._blacklist = blacklist
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist
self._extra_treq_args = treq_args

self.user_agent = hs.version_string
self.clock = hs.get_clock()
self.reactor = hs.get_reactor()
if hs.config.user_agent_suffix:
self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix)

self.user_agent = self.user_agent.encode('ascii')
self._make_agent()

def _make_agent(self, _agent=False):

if _agent:
self.agent = _agent
else:
# the pusher makes lots of concurrent SSL connections to sygnal, and
# tends to do so in batches, so we need to allow the pool to keep
# lots of idle connections around.
pool = HTTPConnectionPool(self.reactor)
pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
pool.cachedConnectionTimeout = 2 * 60

# The default context factory in Twisted 14.0.0 (which we require) is
# BrowserLikePolicyForHTTPS which will do regular cert validation
# 'like a browser'
self.agent = Agent(
self.reactor,
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
)

# If we have an IP blacklist, use the blacklisting Agent wrapper.
if self._blacklist:

if self._ip_blacklist:
real_reactor = hs.get_reactor()
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
nameResolver = IPBlacklistingResolver(
self.reactor, self._whitelist, self._blacklist
real_reactor, self._ip_whitelist, self._ip_blacklist
)

@implementer(IReactorPluggableNameResolver)
Expand All @@ -205,20 +213,38 @@ def __getattr__(_self, attr):
if attr == "nameResolver":
return nameResolver
else:
return getattr(self.reactor, attr)
return getattr(real_reactor, attr)

self.agent = Agent(
Reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
)
self.reactor = Reactor()
else:
self.reactor = hs.get_reactor()

# the pusher makes lots of concurrent SSL connections to sygnal, and
# tends to do so in batches, so we need to allow the pool to keep
# lots of idle connections around.
pool = HTTPConnectionPool(self.reactor)
pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
pool.cachedConnectionTimeout = 2 * 60

# The default context factory in Twisted 14.0.0 (which we require) is
# BrowserLikePolicyForHTTPS which will do regular cert validation
# 'like a browser'
self.agent = Agent(
self.reactor,
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
)

if self._ip_blacklist:
# If we have an IP blacklist, we then install the blacklisting Agent
# which prevents direct access to IP addresses, that are not caught
# by the DNS resolution.
self.agent = BlacklistingAgentWrapper(
self.agent,
self.reactor,
whitelist=self._whitelist,
blacklist=self._blacklist,
ip_whitelist=self._ip_whitelist,
ip_blacklist=self._ip_blacklist,
)

@defer.inlineCallbacks
Expand Down
4 changes: 2 additions & 2 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def __init__(self, hs, media_repo, media_storage):
self.client = SimpleHttpClient(
hs,
treq_args={"browser_like_redirects": True},
whitelist=hs.config.url_preview_ip_range_whitelist,
blacklist=hs.config.url_preview_ip_range_blacklist,
ip_whitelist=hs.config.url_preview_ip_range_whitelist,
ip_blacklist=hs.config.url_preview_ip_range_blacklist,
)
self.media_repo = media_repo
self.primary_base_path = media_repo.primary_base_path
Expand Down