From 5db7df77cf7dc767a248663a6281da37fce833b1 Mon Sep 17 00:00:00 2001 From: Tomas Ehrlich Date: Mon, 20 Aug 2012 13:36:36 +0200 Subject: [PATCH 1/2] Added collation as configurable parameter. --- sql_server/pyodbc/base.py | 75 ++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/sql_server/pyodbc/base.py b/sql_server/pyodbc/base.py index c9acdcc5..845a5b5b 100644 --- a/sql_server/pyodbc/base.py +++ b/sql_server/pyodbc/base.py @@ -50,15 +50,15 @@ warnings.filterwarnings('error', 'The DATABASE_ODBC.+ is deprecated', DeprecationWarning, __name__, 0) -collation = 'Latin1_General_CI_AS' +default_collation = 'Latin1_General_CI_AS' if hasattr(settings, 'DATABASE_COLLATION'): warnings.warn( "The DATABASE_COLLATION setting is going to be deprecated, use DATABASE_OPTIONS['collation'] instead.", DeprecationWarning ) - collation = settings.DATABASE_COLLATION + default_collation = settings.DATABASE_COLLATION elif hasattr(settings, 'DATABASE_OPTIONS') and 'collation' in settings.DATABASE_OPTIONS: - collation = settings.DATABASE_OPTIONS['collation'] + default_collation = settings.DATABASE_OPTIONS['collation'] deprecated = ( ('DATABASE_ODBC_DRIVER', 'driver'), @@ -91,39 +91,6 @@ class DatabaseWrapper(BaseDatabaseWrapper): unicode_results = False datefirst = 7 - # Collations: http://msdn2.microsoft.com/en-us/library/ms184391.aspx - # http://msdn2.microsoft.com/en-us/library/ms179886.aspx - # T-SQL LIKE: http://msdn2.microsoft.com/en-us/library/ms179859.aspx - # Full-Text search: http://msdn2.microsoft.com/en-us/library/ms142571.aspx - # CONTAINS: http://msdn2.microsoft.com/en-us/library/ms187787.aspx - # FREETEXT: http://msdn2.microsoft.com/en-us/library/ms176078.aspx - - operators = { - # Since '=' is used not only for string comparision there is no way - # to make it case (in)sensitive. It will simply fallback to the - # database collation. - 'exact': '= %s', - 'iexact': "= UPPER(%s)", - 'contains': "LIKE %s ESCAPE '\\' COLLATE " + collation, - 'icontains': "LIKE UPPER(%s) ESCAPE '\\' COLLATE "+ collation, - 'gt': '> %s', - 'gte': '>= %s', - 'lt': '< %s', - 'lte': '<= %s', - 'startswith': "LIKE %s ESCAPE '\\' COLLATE " + collation, - 'endswith': "LIKE %s ESCAPE '\\' COLLATE " + collation, - 'istartswith': "LIKE UPPER(%s) ESCAPE '\\' COLLATE " + collation, - 'iendswith': "LIKE UPPER(%s) ESCAPE '\\' COLLATE " + collation, - - # TODO: remove, keep native T-SQL LIKE wildcards support - # or use a "compatibility layer" and replace '*' with '%' - # and '.' with '_' - 'regex': 'LIKE %s COLLATE ' + collation, - 'iregex': 'LIKE %s COLLATE ' + collation, - - # TODO: freetext, full-text contains... - } - def __init__(self, *args, **kwargs): super(DatabaseWrapper, self).__init__(*args, **kwargs) @@ -220,6 +187,8 @@ def _cursor(self): else: cstr_parts.append('SERVERNAME=%s' % host_str) + self.collation = options.get('collation', default_collation) + if user_str: cstr_parts.append('UID=%s;PWD=%s' % (user_str, passwd_str)) else: @@ -279,6 +248,40 @@ def _cursor(self): return CursorWrapper(cursor, self.driver_needs_utf8) + # Collations: http://msdn2.microsoft.com/en-us/library/ms184391.aspx + # http://msdn2.microsoft.com/en-us/library/ms179886.aspx + # T-SQL LIKE: http://msdn2.microsoft.com/en-us/library/ms179859.aspx + # Full-Text search: http://msdn2.microsoft.com/en-us/library/ms142571.aspx + # CONTAINS: http://msdn2.microsoft.com/en-us/library/ms187787.aspx + # FREETEXT: http://msdn2.microsoft.com/en-us/library/ms176078.aspx + @property + def operators(self): + return { + # Since '=' is used not only for string comparision there is no way + # to make it case (in)sensitive. It will simply fallback to the + # database collation. + 'exact': '= %s', + 'iexact': "= UPPER(%s)", + 'contains': "LIKE %s ESCAPE '\\' COLLATE " + self.collation, + 'icontains': "LIKE UPPER(%s) ESCAPE '\\' COLLATE "+ self.collation, + 'gt': '> %s', + 'gte': '>= %s', + 'lt': '< %s', + 'lte': '<= %s', + 'startswith': "LIKE %s ESCAPE '\\' COLLATE " + self.collation, + 'endswith': "LIKE %s ESCAPE '\\' COLLATE " + self.collation, + 'istartswith': "LIKE UPPER(%s) ESCAPE '\\' COLLATE " + self.collation, + 'iendswith': "LIKE UPPER(%s) ESCAPE '\\' COLLATE " + self.collation, + + # TODO: remove, keep native T-SQL LIKE wildcards support + # or use a "compatibility layer" and replace '*' with '%' + # and '.' with '_' + 'regex': 'LIKE %s COLLATE ' + self.collation, + 'iregex': 'LIKE %s COLLATE ' + self.collation, + + # TODO: freetext, full-text contains... + } + class CursorWrapper(object): """ From dc920bffe84c1e18617205ddfa5f0ec0d5eeb48f Mon Sep 17 00:00:00 2001 From: Tomas Ehrlich Date: Mon, 20 Aug 2012 14:19:43 +0200 Subject: [PATCH 2/2] Added client_encoding parameter. Since I'm not able to configure FreeTDS/ODBC client to use utf-8, everything is encoded in latin-1. Thus, the decode('utf-8') throws UnicodeDecodeError when it comes to non-ascii characters. Client_encoding option allows me to specify the input encoding. --- sql_server/pyodbc/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql_server/pyodbc/base.py b/sql_server/pyodbc/base.py index 845a5b5b..4b91ff2a 100644 --- a/sql_server/pyodbc/base.py +++ b/sql_server/pyodbc/base.py @@ -188,6 +188,7 @@ def _cursor(self): cstr_parts.append('SERVERNAME=%s' % host_str) self.collation = options.get('collation', default_collation) + self.client_encoding = options.get('client_encoding', 'utf-8') if user_str: cstr_parts.append('UID=%s;PWD=%s' % (user_str, passwd_str)) @@ -246,7 +247,7 @@ def _cursor(self): if self.drv_name.startswith('LIBTDSODBC') and not self.connection.autocommit: self.connection.commit() - return CursorWrapper(cursor, self.driver_needs_utf8) + return CursorWrapper(cursor, self.driver_needs_utf8, self.client_encoding) # Collations: http://msdn2.microsoft.com/en-us/library/ms184391.aspx # http://msdn2.microsoft.com/en-us/library/ms179886.aspx @@ -288,9 +289,10 @@ class CursorWrapper(object): A wrapper around the pyodbc's cursor that takes in account a) some pyodbc DB-API 2.0 implementation and b) some common ODBC driver particularities. """ - def __init__(self, cursor, driver_needs_utf8): + def __init__(self, cursor, driver_needs_utf8, client_encoding): self.cursor = cursor self.driver_needs_utf8 = driver_needs_utf8 + self.client_encoding = client_encoding self.last_sql = '' self.last_params = () @@ -361,7 +363,7 @@ def format_row(self, row): (pyodbc Rows are not sliceable). """ - if not self.driver_needs_utf8: + if not self.driver_needs_utf8 and self.client_encoding == 'utf-8': return tuple(row) # FreeTDS (and other ODBC drivers?) doesn't support Unicode @@ -369,7 +371,7 @@ def format_row(self, row): out = [] for f in row: if isinstance(f, str): - out.append(f.decode('utf-8')) + out.append(f.decode(self.client_encoding)) else: out.append(f)