Skip to content
This repository has been archived by the owner on Jul 11, 2022. It is now read-only.

Added Python3 support #43

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ matrix:
env: COVER=1
- python: '2.7'
env: CROSSDOCK=1
- python: '3.6'
env: COVER=1
- python: '3.6'
env: CROSSDOCK=1

services:
- docker
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ shell:

# Generate jaeger thrifts
THRIFT_GEN_DIR=jaeger_client/thrift_gen
THRIFT_VER=0.9.3
THRIFT_VER=0.10.0
THRIFT_IMG=thrift:$(THRIFT_VER)
THRIFT_PY_ARGS=new_style,tornado
THRIFT=docker run -v "${PWD}:/data" $(THRIFT_IMG) thrift
Expand Down
5 changes: 4 additions & 1 deletion crossdock/server/endtoend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from builtins import range
from builtins import object
import tornado.web
import json
import six

from jaeger_client.local_agent_net import LocalAgentSender
from jaeger_client.config import (
Expand Down Expand Up @@ -99,7 +102,7 @@ def generate_traces(self, request, response_writer):
tracer = self.tracers[sampler_type]
for _ in range(req.get('count', 0)):
span = tracer.start_span(req['operation'])
for k, v in req.get('tags', {}).iteritems():
for k, v in six.iteritems(req.get('tags', {})):
span.set_tag(k, v)
span.finish()
response_writer.finish()
4 changes: 3 additions & 1 deletion crossdock/server/serializer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from builtins import str
import json
import logging
import six

from crossdock.thrift_gen.tracetest.ttypes import JoinTraceRequest, StartTraceRequest, \
Downstream, Transport, TraceResponse, ObservedSpan
Expand Down Expand Up @@ -93,7 +95,7 @@ def traced_service_object_to_json(obj):


def set_traced_service_object_values(obj, values, downstream_func):
for k in values.iterkeys():
for k in six.iterkeys(values):
if hasattr(obj, k):
if k == 'downstream':
if values[k] is not None:
Expand Down
1 change: 1 addition & 0 deletions crossdock/server/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from builtins import object
import logging

import tornado.web
Expand Down
29 changes: 17 additions & 12 deletions jaeger_client/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@
# THE SOFTWARE.

from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
from past.builtins import basestring
from builtins import object

import urllib
import urllib.request, urllib.parse, urllib.error
import six

from opentracing import (
InvalidCarrierException,
Expand Down Expand Up @@ -64,37 +69,37 @@ def inject(self, span_context, carrier):
parent_id=span_context.parent_id, flags=span_context.flags)
baggage = span_context.baggage
if baggage:
for key, value in baggage.iteritems():
for key, value in six.iteritems(baggage):
if self.url_encoding:
encoded_value = urllib.quote(value)
encoded_value = urllib.parse.quote(value)
else:
encoded_value = value
carrier['%s%s' % (self.baggage_prefix, key)] = encoded_value

def extract(self, carrier):
if not hasattr(carrier, 'iteritems'):
if not isinstance(carrier, dict):
raise InvalidCarrierException('carrier not a collection')
trace_id, span_id, parent_id, flags = None, None, None, None
baggage = None
debug_id = None
for key, value in carrier.iteritems():
for key, value in six.iteritems(carrier):
uc_key = key.lower()
if uc_key == self.trace_id_header:
if self.url_encoding:
value = urllib.unquote(value)
value = urllib.parse.unquote(value)
trace_id, span_id, parent_id, flags = \
span_context_from_string(value)
elif uc_key.startswith(self.baggage_prefix):
if self.url_encoding:
value = urllib.unquote(value)
value = urllib.parse.unquote(value)
attr_key = key[self.prefix_length:]
if baggage is None:
baggage = {attr_key.lower(): value}
else:
baggage[attr_key.lower()] = value
elif uc_key == self.debug_id_header:
if self.url_encoding:
value = urllib.unquote(value)
value = urllib.parse.unquote(value)
debug_id = value
if not trace_id and baggage:
raise SpanContextCorruptedException('baggage without trace ctx')
Expand Down Expand Up @@ -137,7 +142,7 @@ def span_context_to_string(trace_id, span_id, parent_id, flags):
:param parent_id:
:param flags:
"""
parent_id = parent_id or 0L
parent_id = parent_id or 0
return '{:x}:{:x}:{:x}:{:x}'.format(trace_id, span_id, parent_id, flags)


Expand All @@ -162,9 +167,9 @@ def span_context_from_string(value):
raise SpanContextCorruptedException(
'malformed trace context "%s"' % value)
try:
trace_id = long(parts[0], 16)
span_id = long(parts[1], 16)
parent_id = long(parts[2], 16)
trace_id = int(parts[0], 16)
span_id = int(parts[1], 16)
parent_id = int(parts[2], 16)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is int equivalent to long? isn't it dependent on the architecture?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand int and long types were pretty much unified as of Python 2.4: https://www.python.org/dev/peps/pep-0237/. In Python 3 any further distinction was erased: there is no long() function and no L postfix for long integer types.

Here is an output from my 2.7 console:

Python 2.7.12 (default, Oct 11 2016, 05:20:59)

>>> int("1000000000000000000", 16)
4722366482869645213696L
>>> long("1000000000000000000", 16)
4722366482869645213696L

>>> long("10", 16)
16L
>>> int("10", 16)
16

This also means that replacing _max_unsigned_id = (1L << 64) with _max_unsigned_id = (1 << 64) somewhere else in this PR is also probably safe:

Python 2.7.12 (default, Oct 11 2016, 05:20:59)

>>> 1L << 64
18446744073709551616L
>>> 1 << 64
18446744073709551616L

flags = int(parts[3], 16)
if trace_id < 1 or span_id < 1 or parent_id < 0 or flags < 0:
raise SpanContextCorruptedException(
Expand Down
1 change: 1 addition & 0 deletions jaeger_client/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# THE SOFTWARE.

from __future__ import absolute_import
from builtins import object

import logging
import threading
Expand Down
4 changes: 2 additions & 2 deletions jaeger_client/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@
DEFAULT_FLUSH_INTERVAL = 1

# Name of the HTTP header used to encode trace ID
TRACE_ID_HEADER = b'uber-trace-id'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember we explicitly ran into an issue with this string being Unicode in some instrumentation of urllib2. Why can we not keep this as b?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe in Python 2.7 string and bytes types are essentially equivalent:

Python 2.7.12 (default, Oct 11 2016, 05:20:59)

>>> a = 'helloпривет'
>>> b = b'helloпривет'
>>> a
'hello\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82'
>>> b
'hello\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82'
>>> type(a)
<type 'str'>
>>> type(b)
<type 'str'>

They are different in Python 3 though:

>>> a='helloпривет'
>>> b=b'helloпривет'
  File "<stdin>", line 1
SyntaxError: bytes can only contain ASCII literal characters.
>>> b=b'hello'
>>> a
'helloпривет'
>>> b
b'hello'
>>> type(a)
<class 'str'>
>>> type(b)
<class 'bytes'>

So, with this string being explicitly marked as bytes, I had the following error when running tests in Python3:

tests/test_tracer.py:178: in test_tracer_tags_hostname
    t = Tracer(service_name='x', reporter=reporter, sampler=sampler)
jaeger_client/tracer.py:62: in __init__
    debug_id_header=debug_id_header,
jaeger_client/codecs.py:56: in __init__
    self.trace_id_header = trace_id_header.lower().replace('_', '-')
E   TypeError: a bytes-like object is required, not 'str'

Which makes sense, because '_' is a string object, not bytes in Python 3. Changing trace_id_header.lower().replace('_', '-') to trace_id_header.lower().replace(b'_', b'-') was fixing this test, but crashing few other ones which were using trace id string without b prefix:

    def test_context_from_readable_headers(self):
        # provide headers all the way through Config object
        config = Config(
            service_name='test',
            config={
                'trace_id_header': 'Trace_ID',
                'baggage_header_prefix': 'Trace-Attr-',
            })
        tracer = config.create_tracer(
...

tests/test_codecs.py:170: in test_context_from_readable_headers
    sampler=ConstSampler(True),
jaeger_client/config.py:279: in create_tracer
    debug_id_header=self.debug_id_header,
jaeger_client/tracer.py:62: in __init__
    debug_id_header=debug_id_header,
jaeger_client/codecs.py:56: in __init__
    self.trace_id_header = trace_id_header.lower().replace(b'_', b'-')
E   TypeError: replace() argument 1 must be str, not bytes

So, weighing the options of changing every affected test and changing just single line TRACE_ID_HEADER = b'uber-trace-id' I've chosen the one with the least amount of changes.

Talking about urllib2 issue, I'm not sure what could have caused it given that bytes and strings are same in Py2 – maybe you have an example?

Copy link
Member

@yurishkuro yurishkuro May 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found these comments in the commit when we changed headers to b:

Force plain (non-unicode) strings
Summary:
image upload was going through multipart form submission code path previously untested with Jaeger, and was failing with 'utf8' codec can't decode byte 0xff in position 152: invalid start byte. Turns out it was due to httplib getting confused on unicode strings used as Jaeger headers. This change forces those headers to be plain strings.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this was the error stack trace

File "opentracing_instrumentation/client_hooks/urllib2.py", line 97, in https_open
    return self.do_open(req, httplib.HTTPSConnection)
  File "opentracing_instrumentation/client_hooks/urllib2.py", line 54, in do_open
    resp = urllib2.AbstractHTTPHandler.do_open(self, conn, req)
  File "python2.7/urllib2.py", line 1174, in do_open
    h.request(req.get_method(), req.get_selector(), req.data, headers)
  File "python2.7/httplib.py", line 966, in request
    self._send_request(method, url, body, headers)
  File "python2.7/httplib.py", line 1000, in _send_request
    self.endheaders(body)
  File "python2.7/httplib.py", line 962, in endheaders
    self._send_output(message_body)
  File "python2.7/httplib.py", line 820, in _send_output
    msg += message_body

The += is what was failing when the headers were defined as unicode strings.

Copy link
Member

@yurishkuro yurishkuro May 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I should've written a test for that (facepalm)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yurishkuro Seems like I can't reproduce the error, so a test case would be beneficial...

I'm also not sure how switching 'uber-trace-id' to b'uber-test-case' would help alone – to my knowledge these two should be equivalent in Python 2.

Here is what I've tried to do (with some variations):

# coding=utf-8

import urllib2

headers = {
    'User-Agent': 'Mozilla/5.0',
    'header_хидер': (u'value_значение').encode('utf-8'),
    'header_klüft_skräms_große': (u'À quelle fréquence envoyez-vous des données étranges?').encode('utf-8')
}

body = 'uber-trace-id'

r = urllib2.Request('http://localhost:11111', data=body, headers=headers)
response = urllib2.urlopen(r)
data = response.read()

print(response)
print('\n\n----------------------\n\n')
print(data)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here's the scenario in Python 2.7, which I believe reflects what was happening:

>>> x=b'x'
>>> x
'x'
>>> y=u'y'
>>> y
u'y'
>>> b=bytes(chr(255))
>>> b
'\xff'
>>> x+b
'x\xff'
>>> y+b
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
UnicodeDecodeError: 'ascii' codec can't decode byte 0xff in position 0: ordinal not in range(128)

The code in httplib.py looks like this

def _send_output(self, message_body=None):
        . . .
        if isinstance(message_body, str):
            msg += message_body

Now, I don't know why message_body contained byte sequence that wasn't a valid Unicode, but it did happen in production code (as I mention, in the image upload, fwiw). It's my understanding that normally that HTTP request's buffer was composed of non-Unicode string, which shown in the example to be OK with concatenating non-Unicode sequence (x+b). However, when tracing headers were added and those headers were defined without b prefix, they we automatically converted into u string. When they were appended to the buffer, it turns the buffer into u, e.g.

>>> x+y
u'xy'

And then later when the body with invalid seq is appended, Python tries to parse it as Unicode and blows up.

For this reason I had to declare the headers as b, non-Unicode strings. It may not be an issue in Python3, but will definitely be an issue in Python 2.7. So perhaps these assignments can be made conditional using six.PY3

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to fix this in #109

TRACE_ID_HEADER = 'uber-trace-id'

# Prefix for HTTP headers used to record baggage items
BAGGAGE_HEADER_PREFIX = b'uberctx-'
BAGGAGE_HEADER_PREFIX = 'uberctx-'

# The name of HTTP header or a TextMap carrier key which, if found in the
# carrier, forces the trace to be sampled as "debug" trace. The value of the
Expand Down
1 change: 1 addition & 0 deletions jaeger_client/local_agent_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# THE SOFTWARE.

from __future__ import absolute_import
from builtins import object
from threadloop import ThreadLoop
import tornado
import tornado.httpclient
Expand Down
1 change: 1 addition & 0 deletions jaeger_client/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# THE SOFTWARE.

from __future__ import absolute_import
from builtins import object


class Metrics(object):
Expand Down
1 change: 1 addition & 0 deletions jaeger_client/rate_limiter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from builtins import object
# Copyright (c) 2017 Uber Technologies, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
1 change: 1 addition & 0 deletions jaeger_client/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# THE SOFTWARE.

from __future__ import absolute_import
from builtins import object
import logging
import threading

Expand Down
12 changes: 8 additions & 4 deletions jaeger_client/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
# THE SOFTWARE.

from __future__ import absolute_import
from __future__ import division
from builtins import object
from past.utils import old_div
import logging
import random
import json
import six

from threading import Lock
from tornado.ioloop import PeriodicCallback
Expand All @@ -45,7 +49,7 @@
SAMPLER_TYPE_TAG_KEY = 'sampler.type'
SAMPLER_PARAM_TAG_KEY = 'sampler.param'
DEFAULT_SAMPLING_PROBABILITY = 0.001
DEFAULT_LOWER_BOUND = 1.0 / (10.0 * 60.0) # sample once every 10 minutes
DEFAULT_LOWER_BOUND = old_div(1.0, (10.0 * 60.0)) # sample once every 10 minutes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for this change? Doesn't Python3 work with 1.0 / (10.0 * 60.0)?

I'd like to have comments in the code for non-trivial decisions.

DEFAULT_MAX_OPERATIONS = 2000

STRATEGIES_STR = 'perOperationStrategies'
Expand Down Expand Up @@ -235,7 +239,7 @@ def update(self, lower_bound, rate):

def __str__(self):
return 'GuaranteedThroughputProbabilisticSampler(%s, %s, %s)' \
% (self.operation, self.rate, self.lower_bound)
% (self.operation, self.rate, round(float(self.lower_bound), 14))


class AdaptiveSampler(Sampler):
Expand Down Expand Up @@ -306,12 +310,12 @@ def update(self, strategies):
ProbabilisticSampler(self.default_sampling_probability)

def close(self):
for _, sampler in self.samplers.iteritems():
for _, sampler in six.iteritems(self.samplers):
sampler.close()

def __str__(self):
return 'AdaptiveSampler(%s, %s, %s)' \
% (self.default_sampling_probability, self.lower_bound,
% (self.default_sampling_probability, round(float(self.lower_bound), 14),
self.max_operations)


Expand Down
4 changes: 3 additions & 1 deletion jaeger_client/span.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
# THE SOFTWARE.

from __future__ import absolute_import
from builtins import str

import json
import threading
import time
import six

import opentracing
from opentracing.ext import tags as ext_tags
Expand Down Expand Up @@ -54,7 +56,7 @@ def __init__(self, context, tracer, operation_name,
self.tags = []
self.logs = []
if tags:
for k, v in tags.iteritems():
for k, v in six.iteritems(tags):
self.set_tag(k, v)

def set_operation_name(self, operation_name):
Expand Down
25 changes: 19 additions & 6 deletions jaeger_client/thrift.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from past.builtins import basestring
# Copyright (c) 2016 Uber Technologies, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand All @@ -20,6 +21,7 @@

import socket
import struct
import sys

import jaeger_client.thrift_gen.zipkincore.ZipkinCollector as zipkin_collector
import jaeger_client.thrift_gen.sampling.SamplingManager as sampling_manager
Expand All @@ -30,8 +32,12 @@

_max_signed_port = (1 << 15) - 1
_max_unsigned_port = (1 << 16)
_max_signed_id = (1L << 63) - 1
_max_unsigned_id = (1L << 64)
_max_signed_id = (1 << 63) - 1
_max_unsigned_id = (1 << 64)


def str_to_binary(value):
return value if sys.version_info[0] == 2 else value.encode('utf-8')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this is checking the python version, I would rather use six.PY2 constant.



def ipv4_to_int(ipv4):
Expand Down Expand Up @@ -61,6 +67,12 @@ def port_to_int(port):
def id_to_int(big_id):
# zipkincore.thrift defines ID fields as i64, which is signed,
# therefore we convert large IDs (> 2^63) to negative longs

# In Python 2, expression None > 1 is legal and has a value of False
# In Python 3, this expression is illegal - so we need to have an additional check
if big_id is None:
return None

if big_id > _max_signed_id:
big_id -= _max_unsigned_id
return big_id
Expand All @@ -78,8 +90,9 @@ def make_endpoint(ipv4, port, service_name):
def make_string_tag(key, value):
if len(value) > 256:
value = value[:256]

return zipkin_collector.BinaryAnnotation(
key, value, zipkin_collector.AnnotationType.STRING)
key, str_to_binary(value), zipkin_collector.AnnotationType.STRING)


def make_peer_address_tag(key, host):
Expand All @@ -90,7 +103,7 @@ def make_peer_address_tag(key, host):
:param host:
"""
return zipkin_collector.BinaryAnnotation(
key, '0x01', zipkin_collector.AnnotationType.BOOL, host)
key, str_to_binary('0x01'), zipkin_collector.AnnotationType.BOOL, host)


def make_local_component_tag(component_name, endpoint):
Expand All @@ -100,7 +113,7 @@ def make_local_component_tag(component_name, endpoint):
:param endpoint:
"""
return zipkin_collector.BinaryAnnotation(
key=LOCAL_COMPONENT, value=component_name,
key=LOCAL_COMPONENT, value=str_to_binary(component_name),
annotation_type=zipkin_collector.AnnotationType.STRING,
host=endpoint)

Expand All @@ -117,7 +130,7 @@ def timestamp_micros(ts):
:param ts:
:return:
"""
return long(ts * 1000000)
return int(ts * 1000000)


def make_zipkin_spans(spans):
Expand Down
Loading