diff --git a/bigquery/.coveragerc b/bigquery/.coveragerc index a54b99aa14b7..d097511c3124 100644 --- a/bigquery/.coveragerc +++ b/bigquery/.coveragerc @@ -9,3 +9,5 @@ exclude_lines = pragma: NO COVER # Ignore debug-only repr def __repr__ + # Ignore abstract methods + raise NotImplementedError diff --git a/bigquery/README.rst b/bigquery/README.rst index 721ba18a27b2..01a1194c41fc 100644 --- a/bigquery/README.rst +++ b/bigquery/README.rst @@ -38,12 +38,27 @@ Using the API Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_) -solves this problem by enabling super-fast, SQL-like queries against -append-only tables, using the processing power of Google's infrastructure. +solves this problem by enabling super-fast, SQL queries against +append-mostly tables, using the processing power of Google's infrastructure. .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery .. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/ +Create a dataset +~~~~~~~~~~~~~~~~ + +.. code:: python + + from google.cloud import bigquery + from google.cloud.bigquery import Dataset + + client = bigquery.Client() + + dataset_ref = client.dataset('dataset_name') + dataset = Dataset(dataset_ref) + dataset.description = 'my dataset' + dataset = client.create_dataset(dataset) # API request + Load data from CSV ~~~~~~~~~~~~~~~~~~ @@ -52,39 +67,43 @@ Load data from CSV import csv from google.cloud import bigquery + from google.cloud.bigquery import LoadJobConfig from google.cloud.bigquery import SchemaField client = bigquery.Client() - dataset = client.dataset('dataset_name') - dataset.create() # API request - SCHEMA = [ SchemaField('full_name', 'STRING', mode='required'), SchemaField('age', 'INTEGER', mode='required'), ] - table = dataset.table('table_name', SCHEMA) - table.create() + table_ref = client.dataset('dataset_name').table('table_name') + + load_config = LoadJobConfig() + load_config.skip_leading_rows = 1 + load_config.schema = SCHEMA - with open('csv_file', 'rb') as readable: - table.upload_from_file( - readable, source_format='CSV', skip_leading_rows=1) + # Contents of csv_file.csv: + # Name,Age + # Tim,99 + with open('csv_file.csv', 'rb') as readable: + client.load_table_from_file( + readable, table_ref, job_config=load_config) # API request -Perform a synchronous query -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Perform a query +~~~~~~~~~~~~~~~ .. code:: python - # Perform a synchronous query. + # Perform a query. QUERY = ( - 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' - 'WHERE state = "TX"') - query = client.run_sync_query('%s LIMIT 100' % QUERY) - query.timeout_ms = TIMEOUT_MS - query.run() - - for row in query.rows: - print(row) + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + query_job = client.query(QUERY) # API request + rows = query_job.result() # Waits for query to finish + + for row in rows: + print(row.name) See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py index e2eb29e866a3..cda5236d3c60 100644 --- a/bigquery/google/cloud/bigquery/__init__.py +++ b/bigquery/google/cloud/bigquery/__init__.py @@ -26,17 +26,63 @@ from pkg_resources import get_distribution __version__ = get_distribution('google-cloud-bigquery').version -from google.cloud.bigquery._helpers import ArrayQueryParameter -from google.cloud.bigquery._helpers import ScalarQueryParameter -from google.cloud.bigquery._helpers import StructQueryParameter +from google.cloud.bigquery._helpers import DEFAULT_RETRY from google.cloud.bigquery.client import Client -from google.cloud.bigquery.dataset import AccessGrant +from google.cloud.bigquery.dataset import AccessEntry from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.job import CopyJob +from google.cloud.bigquery.job import CopyJobConfig +from google.cloud.bigquery.job import ExtractJob +from google.cloud.bigquery.job import ExtractJobConfig +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import QueryJobConfig +from google.cloud.bigquery.job import LoadJob +from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import BigtableOptions +from google.cloud.bigquery.external_config import BigtableColumnFamily +from google.cloud.bigquery.external_config import BigtableColumn +from google.cloud.bigquery.external_config import CSVOptions +from google.cloud.bigquery.external_config import GoogleSheetsOptions __all__ = [ - '__version__', 'AccessGrant', 'ArrayQueryParameter', 'Client', - 'Dataset', 'ScalarQueryParameter', 'SchemaField', 'StructQueryParameter', + '__version__', + 'Client', + # Queries + 'QueryJob', + 'QueryJobConfig', + 'ArrayQueryParameter', + 'ScalarQueryParameter', + 'StructQueryParameter', + # Datasets + 'Dataset', + 'DatasetReference', + 'AccessEntry', + # Tables 'Table', + 'TableReference', + 'CopyJob', + 'CopyJobConfig', + 'ExtractJob', + 'ExtractJobConfig', + 'LoadJob', + 'LoadJobConfig', + # Shared helpers + 'SchemaField', + 'UDFResource', + 'ExternalConfig', + 'BigtableOptions', + 'BigtableColumnFamily', + 'BigtableColumn', + 'CSVOptions', + 'GoogleSheetsOptions', + 'DEFAULT_RETRY', ] diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index aaabcec2a408..8e321ee59866 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -15,10 +15,12 @@ """Shared helper functions for BigQuery API classes.""" import base64 -from collections import OrderedDict -import copy import datetime +import operator +import six + +from google.api_core import retry from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -71,6 +73,39 @@ def _timestamp_from_json(value, field): return _datetime_from_microseconds(1e6 * float(value)) +def _timestamp_query_param_from_json(value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (.SchemaField): The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(' ', 'T', 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace('Z', '') + value = value.replace('+00:00', '') + + if '.' in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime( + value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC) + else: + return None + + def _datetime_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable. @@ -139,8 +174,74 @@ def _record_from_json(value, field): 'RECORD': _record_from_json, } +_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) +_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json + + +class Row(object): + """A BigQuery row. + + Values can be accessed by position (index), by key like a dict, + or as properties. -def _row_from_json(row, schema): + :type values: tuple + :param values: the row values + + :type field_to_index: dict + :param field_to_index: a mapping from schema field names to indexes + """ + + # Choose unusual field names to try to avoid conflict with schema fields. + __slots__ = ('_xxx_values', '_xxx_field_to_index') + + def __init__(self, values, field_to_index): + self._xxx_values = values + self._xxx_field_to_index = field_to_index + + def values(self): + return self._xxx_values + + def __getattr__(self, name): + i = self._xxx_field_to_index.get(name) + if i is None: + raise AttributeError('no row field "%s"' % name) + return self._xxx_values[i] + + def __len__(self): + return len(self._xxx_values) + + def __getitem__(self, key): + if isinstance(key, six.string_types): + i = self._xxx_field_to_index.get(key) + if i is None: + raise KeyError('no row field "%s"' % key) + key = i + return self._xxx_values[key] + + def __eq__(self, other): + if not isinstance(other, Row): + return NotImplemented + return( + self._xxx_values == other._xxx_values and + self._xxx_field_to_index == other._xxx_field_to_index) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + # sort field dict by value, for determinism + items = sorted(self._xxx_field_to_index.items(), + key=operator.itemgetter(1)) + f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}' + return 'Row({}, {})'.format(self._xxx_values, f2i) + + +def _field_to_index_mapping(schema): + """Create a mapping from schema field name to index of field.""" + return {f.name: i for i, f in enumerate(schema)} + + +def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. Note: ``row['f']`` and ``schema`` are presumed to be of the same length. @@ -167,9 +268,11 @@ def _row_from_json(row, schema): return tuple(row_data) -def _rows_from_json(rows, schema): +def _rows_from_json(values, schema): """Convert JSON row data to rows with appropriate types.""" - return [_row_from_json(row, schema) for row in rows] + field_to_index = _field_to_index_mapping(schema) + return [Row(_row_tuple_from_json(r, schema), field_to_index) + for r in values] def _int_to_json(value): @@ -264,51 +367,64 @@ def _time_to_json(value): _SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter -class _ConfigurationProperty(object): +def _snake_to_camel_case(value): + """Convert snake case string to camel case.""" + words = value.split('_') + return words[0] + ''.join(map(str.capitalize, words[1:])) + + +class _ApiResourceProperty(object): """Base property implementation. - Values will be stored on a `_configuration` helper attribute of the + Values will be stored on a `_properties` helper attribute of the property's job instance. :type name: str :param name: name of the property + + :type resource_name: str + :param resource_name: name of the property in the resource dictionary """ - def __init__(self, name): + def __init__(self, name, resource_name): self.name = name - self._backing_name = '_%s' % (self.name,) + self.resource_name = resource_name def __get__(self, instance, owner): - """Descriptor protocal: accesstor""" + """Descriptor protocol: accessor""" if instance is None: return self - return getattr(instance._configuration, self._backing_name) + return instance._properties.get(self.resource_name) def _validate(self, value): """Subclasses override to impose validation policy.""" pass def __set__(self, instance, value): - """Descriptor protocal: mutator""" + """Descriptor protocol: mutator""" self._validate(value) - setattr(instance._configuration, self._backing_name, value) + instance._properties[self.resource_name] = value def __delete__(self, instance): - """Descriptor protocal: deleter""" - delattr(instance._configuration, self._backing_name) + """Descriptor protocol: deleter""" + del instance._properties[self.resource_name] -class _TypedProperty(_ConfigurationProperty): +class _TypedApiResourceProperty(_ApiResourceProperty): """Property implementation: validates based on value type. :type name: str :param name: name of the property + :type resource_name: str + :param resource_name: name of the property in the resource dictionary + :type property_type: type or sequence of types :param property_type: type to be validated """ - def __init__(self, name, property_type): - super(_TypedProperty, self).__init__(name) + def __init__(self, name, resource_name, property_type): + super(_TypedApiResourceProperty, self).__init__( + name, resource_name) self.property_type = property_type def _validate(self, value): @@ -316,497 +432,59 @@ def _validate(self, value): :raises: ValueError on a type mismatch. """ + if value is None: + return if not isinstance(value, self.property_type): raise ValueError('Required type: %s' % (self.property_type,)) -class _EnumProperty(_ConfigurationProperty): - """Pseudo-enumeration class. +class _ListApiResourceProperty(_ApiResourceProperty): + """Property implementation: validates based on value type. :type name: str - :param name: name of the property. - """ - - -class UDFResource(object): - """Describe a single user-defined function (UDF) resource. - - :type udf_type: str - :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') + :param name: name of the property - :type value: str - :param value: the inline code or resource URI. + :type resource_name: str + :param resource_name: name of the property in the resource dictionary - See - https://cloud.google.com/bigquery/user-defined-functions#api + :type property_type: type or sequence of types + :param property_type: type to be validated """ - def __init__(self, udf_type, value): - self.udf_type = udf_type - self.value = value - - def __eq__(self, other): - if not isinstance(other, UDFResource): - return NotImplemented - return( - self.udf_type == other.udf_type and - self.value == other.value) - - def __ne__(self, other): - return not self == other - - -class UDFResourcesProperty(object): - """Custom property type, holding :class:`UDFResource` instances.""" + def __init__(self, name, resource_name, property_type): + super(_ListApiResourceProperty, self).__init__( + name, resource_name) + self.property_type = property_type def __get__(self, instance, owner): """Descriptor protocol: accessor""" if instance is None: return self - return list(instance._udf_resources) - - def __set__(self, instance, value): - """Descriptor protocol: mutator""" - if not all(isinstance(u, UDFResource) for u in value): - raise ValueError("udf items must be UDFResource") - instance._udf_resources = tuple(value) - - -class AbstractQueryParameter(object): - """Base class for named / positional query parameters. - """ - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - """ - raise NotImplementedError - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - """ - raise NotImplementedError - - -class ScalarQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for scalar values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type type_: str - :param type_: name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - """ - def __init__(self, name, type_, value): - self.name = name - self.type_ = type_ - self.value = value - - @classmethod - def positional(cls, type_, value): - """Factory for positional paramater. - - :type type_: str - :param type_: - name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - - :type value: str, int, float, bool, :class:`datetime.datetime`, or - :class:`datetime.date`. - :param value: the scalar parameter value. - - :rtype: :class:`ScalarQueryParameter` - :returns: instance without name - """ - return cls(None, type_, value) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. + return instance._properties.get(self.resource_name, []) - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ScalarQueryParameter` - :returns: instance - """ - name = resource.get('name') - type_ = resource['parameterType']['type'] - value = resource['parameterValue']['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) - return cls(name, type_, converted) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) - resource = { - 'parameterType': { - 'type': self.type_, - }, - 'parameterValue': { - 'value': value, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ScalarQueryParameter`. - """ - return ( - self.name, - self.type_.upper(), - self.value, - ) - - def __eq__(self, other): - if not isinstance(other, ScalarQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ScalarQueryParameter{}'.format(self._key()) - - -class ArrayQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for array values. - - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type. - :param values: the parameter array values. - """ - def __init__(self, name, array_type, values): - self.name = name - self.array_type = array_type - self.values = values - - @classmethod - def positional(cls, array_type, values): - """Factory for positional parameters. - - :type array_type: str - :param array_type: - name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - :type values: list of appropriate scalar type - :param values: the parameter array values. - - :rtype: :class:`ArrayQueryParameter` - :returns: instance without name - """ - return cls(None, array_type, values) - - @classmethod - def _from_api_repr_struct(cls, resource): - name = resource.get('name') - converted = [] - # We need to flatten the array to use the StructQueryParameter - # parse code. - resource_template = { - # The arrayType includes all the types of the fields of the STRUCT - 'parameterType': resource['parameterType']['arrayType'] - } - for array_value in resource['parameterValue']['arrayValues']: - struct_resource = copy.deepcopy(resource_template) - struct_resource['parameterValue'] = array_value - struct_value = StructQueryParameter.from_api_repr(struct_resource) - converted.append(struct_value) - return cls(name, 'STRUCT', converted) - - @classmethod - def _from_api_repr_scalar(cls, resource): - name = resource.get('name') - array_type = resource['parameterType']['arrayType']['type'] - values = [ - value['value'] - for value - in resource['parameterValue']['arrayValues']] - converted = [ - _CELLDATA_FROM_JSON[array_type](value, None) for value in values] - return cls(name, array_type, converted) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`ArrayQueryParameter` - :returns: instance - """ - array_type = resource['parameterType']['arrayType']['type'] - if array_type == 'STRUCT': - return cls._from_api_repr_struct(resource) - return cls._from_api_repr_scalar(resource) - - def to_api_repr(self): - """Construct JSON API representation for the parameter. + def _validate(self, value): + """Ensure that 'value' is of the appropriate type. - :rtype: dict - :returns: JSON mapping - """ - values = self.values - if self.array_type == 'RECORD' or self.array_type == 'STRUCT': - reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]['parameterType'] - a_values = [repr_['parameterValue'] for repr_ in reprs] - else: - a_type = {'type': self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) - if converter is not None: - values = [converter(value) for value in values] - a_values = [{'value': value} for value in values] - resource = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': a_type, - }, - 'parameterValue': { - 'arrayValues': a_values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. + :raises: ValueError on a type mismatch. """ - return ( - self.name, - self.array_type.upper(), - self.values, - ) - - def __eq__(self, other): - if not isinstance(other, ArrayQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'ArrayQueryParameter{}'.format(self._key()) + if value is None: + raise ValueError(( + 'Required type: list of {}. ' + 'To unset, use del or set to empty list').format( + self.property_type,)) + if not all(isinstance(item, self.property_type) for item in value): + raise ValueError( + 'Required type: list of %s' % (self.property_type,)) -class StructQueryParameter(AbstractQueryParameter): - """Named / positional query parameters for struct values. +class _EnumApiResourceProperty(_ApiResourceProperty): + """Pseudo-enumeration class. - :type name: str or None - :param name: Parameter name, used via ``@foo`` syntax. If None, the - parameter can only be addressed via position (``?``). + :type name: str + :param name: name of the property. - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct + :type resource_name: str + :param resource_name: name of the property in the resource dictionary """ - def __init__(self, name, *sub_params): - self.name = name - types = self.struct_types = OrderedDict() - values = self.struct_values = {} - for sub in sub_params: - if isinstance(sub, self.__class__): - types[sub.name] = 'STRUCT' - values[sub.name] = sub - elif isinstance(sub, ArrayQueryParameter): - types[sub.name] = 'ARRAY' - values[sub.name] = sub - else: - types[sub.name] = sub.type_ - values[sub.name] = sub.value - - @classmethod - def positional(cls, *sub_params): - """Factory for positional parameters. - - :type sub_params: tuple of :class:`ScalarQueryParameter` - :param sub_params: the sub-parameters for the struct - - :rtype: :class:`StructQueryParameter` - :returns: instance without name - """ - return cls(None, *sub_params) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct parameter from JSON resource. - - :type resource: dict - :param resource: JSON mapping of parameter - - :rtype: :class:`StructQueryParameter` - :returns: instance - """ - name = resource.get('name') - instance = cls(name) - type_resources = {} - types = instance.struct_types - for item in resource['parameterType']['structTypes']: - types[item['name']] = item['type']['type'] - type_resources[item['name']] = item['type'] - struct_values = resource['parameterValue']['structValues'] - for key, value in struct_values.items(): - type_ = types[key] - converted = None - if type_ == 'STRUCT': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = StructQueryParameter.from_api_repr(struct_resource) - elif type_ == 'ARRAY': - struct_resource = { - 'name': key, - 'parameterType': type_resources[key], - 'parameterValue': value, - } - converted = ArrayQueryParameter.from_api_repr(struct_resource) - else: - value = value['value'] - converted = _CELLDATA_FROM_JSON[type_](value, None) - instance.struct_values[key] = converted - return instance - - def to_api_repr(self): - """Construct JSON API representation for the parameter. - - :rtype: dict - :returns: JSON mapping - """ - s_types = {} - values = {} - for name, value in self.struct_values.items(): - type_ = self.struct_types[name] - if type_ in ('STRUCT', 'ARRAY'): - repr_ = value.to_api_repr() - s_types[name] = {'name': name, 'type': repr_['parameterType']} - values[name] = repr_['parameterValue'] - else: - s_types[name] = {'name': name, 'type': {'type': type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) - values[name] = {'value': value} - - resource = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [s_types[key] for key in self.struct_types], - }, - 'parameterValue': { - 'structValues': values, - }, - } - if self.name is not None: - resource['name'] = self.name - return resource - - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - tuple: The contents of this :class:`ArrayQueryParameter`. - """ - return ( - self.name, - self.struct_types, - self.struct_values, - ) - - def __eq__(self, other): - if not isinstance(other, StructQueryParameter): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return 'StructQueryParameter{}'.format(self._key()) - - -class QueryParametersProperty(object): - """Custom property type, holding query parameter instances.""" - - def __get__(self, instance, owner): - """Descriptor protocol: accessor - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type owner: type - :param owner: the class owning the property. - - :rtype: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :returns: the descriptor, if accessed via the class, or the instance's - query parameters. - """ - if instance is None: - return self - return list(instance._query_parameters) - - def __set__(self, instance, value): - """Descriptor protocol: mutator - - :type instance: :class:`QueryParametersProperty` - :param instance: instance owning the property (None if accessed via - the class). - - :type value: list of instances of classes derived from - :class:`AbstractQueryParameter`. - :param value: new query parameters for the instance. - """ - if not all(isinstance(u, AbstractQueryParameter) for u in value): - raise ValueError( - "query parameters must be derived from AbstractQueryParameter") - instance._query_parameters = tuple(value) def _item_to_row(iterator, resource): @@ -824,10 +502,11 @@ def _item_to_row(iterator, resource): :type resource: dict :param resource: An item to be converted to a row. - :rtype: tuple + :rtype: :class:`~google.cloud.bigquery.Row` :returns: The next row in the page. """ - return _row_from_json(resource, iterator.schema) + return Row(_row_tuple_from_json(resource, iterator.schema), + iterator._field_to_index) # pylint: disable=unused-argument @@ -848,3 +527,36 @@ def _rows_page_start(iterator, page, response): total_rows = int(total_rows) iterator.total_rows = total_rows # pylint: enable=unused-argument + + +def _should_retry(exc): + """Predicate for determining when to retry. + + We retry if and only if the 'reason' is 'backendError' + or 'rateLimitExceeded'. + """ + if not hasattr(exc, 'errors'): + return False + if len(exc.errors) == 0: + return False + reason = exc.errors[0]['reason'] + return reason == 'backendError' or reason == 'rateLimitExceeded' + + +DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +"""The default retry object. + +Any method with a ``retry`` parameter will be retried automatically, +with reasonable defaults. To disable retry, pass ``retry=None``. +To modify the default retry behavior, call a ``with_XXX`` method +on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, +pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. +""" + + +def _int_or_none(value): + """Helper: deserialize int value from JSON string.""" + if isinstance(value, int): + return value + if value is not None: + return int(value) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 6d77649dcaa5..1062dc279725 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -14,15 +14,54 @@ """Client for interacting with the Google BigQuery API.""" +from __future__ import absolute_import + +import collections +import functools +import os +import uuid + +import six + +from google import resumable_media +from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import ResumableUpload + from google.api_core import page_iterator + +from google.cloud import exceptions from google.cloud.client import ClientWithProject from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import _row_from_mapping from google.cloud.bigquery.job import CopyJob -from google.cloud.bigquery.job import ExtractTableToStorageJob -from google.cloud.bigquery.job import LoadTableFromStorageJob -from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.job import ExtractJob +from google.cloud.bigquery.job import LoadJob +from google.cloud.bigquery.job import QueryJob, QueryJobConfig from google.cloud.bigquery.query import QueryResults +from google.cloud.bigquery._helpers import _item_to_row +from google.cloud.bigquery._helpers import _rows_page_start +from google.cloud.bigquery._helpers import _field_to_index_mapping +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _snake_to_camel_case + + +_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_MAX_MULTIPART_SIZE = 5 * 1024 * 1024 +_DEFAULT_NUM_RETRIES = 6 +_BASE_UPLOAD_TEMPLATE = ( + u'https://www.googleapis.com/upload/bigquery/v2/projects/' + u'{project}/jobs?uploadType=') +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' +_GENERIC_CONTENT_TYPE = u'*/*' +_READ_LESS_THAN_SIZE = ( + 'Size {:d} was specified but the file-like object only had ' + '{:d} bytes remaining.') class Project(object): @@ -82,7 +121,8 @@ def __init__(self, project=None, credentials=None, _http=None): project=project, credentials=credentials, _http=_http) self._connection = Connection(self) - def list_projects(self, max_results=None, page_token=None): + def list_projects(self, max_results=None, page_token=None, + retry=DEFAULT_RETRY): """List projects for the project associated with this client. See @@ -97,21 +137,24 @@ def list_projects(self, max_results=None, page_token=None): not passed, the API will return the first page of projects. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of :class:`~google.cloud.bigquery.client.Project` accessible to the current client. """ return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path='/projects', item_to_value=_item_to_project, items_key='projects', page_token=page_token, max_results=max_results) - def list_datasets(self, include_all=False, max_results=None, - page_token=None): + def list_datasets(self, include_all=False, filter=None, max_results=None, + page_token=None, retry=DEFAULT_RETRY): """List datasets for the project associated with this client. See @@ -120,6 +163,11 @@ def list_datasets(self, include_all=False, max_results=None, :type include_all: bool :param include_all: True if results include hidden datasets. + :type filter: str + :param filter: an expression for filtering the results by label. + For syntax, see + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. + :type max_results: int :param max_results: maximum number of datasets to return, If not passed, defaults to a value set by the API. @@ -129,17 +177,24 @@ def list_datasets(self, include_all=False, max_results=None, not passed, the API will return the first page of datasets. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. + :returns: Iterator of :class:`~google.cloud.bigquery.Dataset`. accessible to the current client. """ extra_params = {} if include_all: extra_params['all'] = True + if filter: + # TODO: consider supporting a dict of label -> value for filter, + # and converting it into a string here. + extra_params['filter'] = filter path = '/projects/%s/datasets' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_dataset, items_key='datasets', @@ -147,27 +202,269 @@ def list_datasets(self, include_all=False, max_results=None, max_results=max_results, extra_params=extra_params) - def dataset(self, dataset_name, project=None): - """Construct a dataset bound to this client. + def dataset(self, dataset_id, project=None): + """Construct a reference to a dataset. - :type dataset_name: str - :param dataset_name: Name of the dataset. + :type dataset_id: str + :param dataset_id: ID of the dataset. :type project: str :param project: (Optional) project ID for the dataset (defaults to the project of the client). - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` - :returns: a new ``Dataset`` instance + :rtype: :class:`google.cloud.bigquery.DatasetReference` + :returns: a new ``DatasetReference`` instance + """ + if project is None: + project = self.project + + return DatasetReference(project, dataset_id) + + def create_dataset(self, dataset): + """API call: create the dataset via a PUT request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type dataset: :class:`~google.cloud.bigquery.Dataset` + :param dataset: A ``Dataset`` populated with the desired initial state. + If project is missing, it defaults to the project of + the client. + + :rtype: ":class:`~google.cloud.bigquery.Dataset`" + :returns: a new ``Dataset`` returned from the service. + """ + path = '/projects/%s/datasets' % (dataset.project,) + api_response = self._connection.api_request( + method='POST', path=path, data=dataset._build_resource()) + return Dataset.from_api_repr(api_response) + + def create_table(self, table): + """API call: create a table via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert + + :type table: :class:`~google.cloud.bigquery.Table` + :param table: A ``Table`` populated with the desired initial state. + + :rtype: ":class:`~google.cloud.bigquery.Table`" + :returns: a new ``Table`` returned from the service. + """ + path = '/projects/%s/datasets/%s/tables' % ( + table.project, table.dataset_id) + resource = table._build_resource(Table.all_fields) + doomed = [field for field in resource if resource[field] is None] + for field in doomed: + del resource[field] + api_response = self._connection.api_request( + method='POST', path=path, data=resource) + return Table.from_api_repr(api_response) + + def _call_api(self, retry, **kwargs): + call = functools.partial(self._connection.api_request, **kwargs) + if retry: + call = retry(call) + return call() + + def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY): + """Fetch the dataset referenced by ``dataset_ref`` + + :type dataset_ref: + :class:`google.cloud.bigquery.DatasetReference` + :param dataset_ref: the dataset to use. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.Dataset` + :returns: a ``Dataset`` instance + """ + api_response = self._call_api(retry, + method='GET', + path=dataset_ref.path) + return Dataset.from_api_repr(api_response) + + def get_table(self, table_ref, retry=DEFAULT_RETRY): + """Fetch the table referenced by ``table_ref`` + + :type table_ref: + :class:`google.cloud.bigquery.TableReference` + :param table_ref: the table to use. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.Table` + :returns: a ``Table`` instance + """ + api_response = self._call_api(retry, method='GET', path=table_ref.path) + return Table.from_api_repr(api_response) + + def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY): + """Change some fields of a dataset. + + Use ``fields`` to specify which fields to update. At least one field + must be provided. If a field is listed in ``fields`` and is ``None`` in + ``dataset``, it will be deleted. + + If ``dataset.etag`` is not ``None``, the update will only + succeed if the dataset on the server has the same ETag. Thus + reading a dataset with ``get_dataset``, changing its fields, + and then passing it ``update_dataset`` will ensure that the changes + will only be saved if no modifications to the dataset occurred + since the read. + + :type dataset: :class:`google.cloud.bigquery.Dataset` + :param dataset: the dataset to update. + + :type fields: sequence of string + :param fields: the fields of ``dataset`` to change, spelled as the + Dataset properties (e.g. "friendly_name"). + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.Dataset` + :returns: the modified ``Dataset`` instance + """ + path = '/projects/%s/datasets/%s' % (dataset.project, + dataset.dataset_id) + partial = {} + for f in fields: + if not hasattr(dataset, f): + raise ValueError('No Dataset field %s' % f) + # All dataset attributes are trivially convertible to JSON except + # for access entries. + if f == 'access_entries': + attr = dataset._build_access_resource() + api_field = 'access' + else: + attr = getattr(dataset, f) + api_field = _snake_to_camel_case(f) + partial[api_field] = attr + if dataset.etag is not None: + headers = {'If-Match': dataset.etag} + else: + headers = None + api_response = self._call_api( + retry, method='PATCH', path=path, data=partial, headers=headers) + return Dataset.from_api_repr(api_response) + + def update_table(self, table, properties, retry=DEFAULT_RETRY): + """API call: update table properties via a PUT request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update + + :type table: + :class:`google.cloud.bigquery.Table` + :param table_ref: the table to update. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.Table` + :returns: a ``Table`` instance + """ + partial = table._build_resource(properties) + if table.etag is not None: + headers = {'If-Match': table.etag} + else: + headers = None + api_response = self._call_api( + retry, + method='PATCH', path=table.path, data=partial, headers=headers) + return Table.from_api_repr(api_response) + + def list_dataset_tables(self, dataset, max_results=None, page_token=None, + retry=DEFAULT_RETRY): + """List tables in the dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list + + :type dataset: One of: + :class:`~google.cloud.bigquery.Dataset` + :class:`~google.cloud.bigquery.DatasetReference` + :param dataset: the dataset whose tables to list, or a reference to it. + + :type max_results: int + :param max_results: (Optional) Maximum number of tables to return. + If not passed, defaults to a value set by the API. + + :type page_token: str + :param page_token: (Optional) Opaque marker for the next "page" of + datasets. If not passed, the API will return the + first page of datasets. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`~google.api_core.page_iterator.Iterator` + :returns: Iterator of :class:`~google.cloud.bigquery.Table` + contained within the current dataset. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + path = '%s/tables' % dataset.path + result = page_iterator.HTTPIterator( + client=self, + api_request=functools.partial(self._call_api, retry), + path=path, + item_to_value=_item_to_table, + items_key='tables', + page_token=page_token, + max_results=max_results) + result.dataset = dataset + return result + + def delete_dataset(self, dataset, retry=DEFAULT_RETRY): + """Delete a dataset. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete + + :type dataset: One of: + :class:`~google.cloud.bigquery.Dataset` + :class:`~google.cloud.bigquery.DatasetReference` + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :param dataset: the dataset to delete, or a reference to it. + """ + if not isinstance(dataset, (Dataset, DatasetReference)): + raise TypeError('dataset must be a Dataset or a DatasetReference') + self._call_api(retry, method='DELETE', path=dataset.path) + + def delete_table(self, table, retry=DEFAULT_RETRY): + """Delete a table + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete + + :type table: One of: + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` + :param table: the table to delete, or a reference to it. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ - return Dataset(dataset_name, client=self, project=project) + if not isinstance(table, (Table, TableReference)): + raise TypeError('table must be a Table or a TableReference') + self._call_api(retry, method='DELETE', path=table.path) - def _get_query_results(self, job_id, project=None, timeout_ms=None): + def _get_query_results(self, job_id, retry, project=None, timeout_ms=None): """Get the query results object for a query job. :type job_id: str :param job_id: Name of the query job. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :type project: str :param project: (Optional) project ID for the query job (defaults to the project of @@ -178,7 +475,7 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): (Optional) number of milliseconds the the API call should wait for the query to complete before the request times out. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` + :rtype: :class:`google.cloud.bigquery.QueryResults` :returns: a new ``QueryResults`` instance """ @@ -192,10 +489,12 @@ def _get_query_results(self, job_id, project=None, timeout_ms=None): path = '/projects/{}/queries/{}'.format(project, job_id) - resource = self._connection.api_request( - method='GET', path=path, query_params=extra_params) - - return QueryResults.from_api_repr(resource, self) + # This call is typically made in a polling loop that checks whether the + # job is complete (from QueryJob.done(), called ultimately from + # QueryJob.result()). So we don't need to poll here. + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) + return QueryResults.from_api_repr(resource) def job_from_resource(self, resource): """Detect correct job type from resource and instantiate. @@ -204,26 +503,61 @@ def job_from_resource(self, resource): :param resource: one job resource from API response :rtype: One of: - :class:`google.cloud.bigquery.job.LoadTableFromStorageJob`, - :class:`google.cloud.bigquery.job.CopyJob`, - :class:`google.cloud.bigquery.job.ExtractTableToStorageJob`, - :class:`google.cloud.bigquery.job.QueryJob`, - :class:`google.cloud.bigquery.job.RunSyncQueryJob` + :class:`google.cloud.bigquery.LoadJob`, + :class:`google.cloud.bigquery.CopyJob`, + :class:`google.cloud.bigquery.ExtractJob`, + :class:`google.cloud.bigquery.QueryJob` :returns: the job instance, constructed via the resource """ config = resource['configuration'] if 'load' in config: - return LoadTableFromStorageJob.from_api_repr(resource, self) + return LoadJob.from_api_repr(resource, self) elif 'copy' in config: return CopyJob.from_api_repr(resource, self) elif 'extract' in config: - return ExtractTableToStorageJob.from_api_repr(resource, self) + return ExtractJob.from_api_repr(resource, self) elif 'query' in config: return QueryJob.from_api_repr(resource, self) raise ValueError('Cannot parse job resource') + def get_job(self, job_id, project=None, retry=DEFAULT_RETRY): + """Fetch a job for the project associated with this client. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + :type job_id: str + :param job_id: Name of the job. + + :type project: str + :param project: + project ID owning the job (defaults to the client's project) + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: One of: + :class:`google.cloud.bigquery.LoadJob`, + :class:`google.cloud.bigquery.CopyJob`, + :class:`google.cloud.bigquery.ExtractJob`, + :class:`google.cloud.bigquery.QueryJob` + :returns: + Concrete job instance, based on the resource returned by the API. + """ + extra_params = {'projection': 'full'} + + if project is None: + project = self.project + + path = '/projects/{}/jobs/{}'.format(project, job_id) + + resource = self._call_api( + retry, method='GET', path=path, query_params=extra_params) + + return self.job_from_resource(resource) + def list_jobs(self, max_results=None, page_token=None, all_users=None, - state_filter=None): + state_filter=None, retry=DEFAULT_RETRY): """List jobs for the project associated with this client. See @@ -250,6 +584,9 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, * ``"pending"`` * ``"running"`` + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterable of job instances. """ @@ -264,7 +601,7 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, path = '/projects/%s/jobs' % (self.project,) return page_iterator.HTTPIterator( client=self, - api_request=self._connection.api_request, + api_request=functools.partial(self._call_api, retry), path=path, item_to_value=_item_to_job, items_key='jobs', @@ -272,125 +609,640 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None, max_results=max_results, extra_params=extra_params) - def load_table_from_storage(self, job_name, destination, *source_uris): - """Construct a job for loading data into a table from CloudStorage. + def load_table_from_uri(self, source_uris, destination, + job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): + """Starts a job for loading data into a table from CloudStorage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load - :type job_name: str - :param job_name: Name of the job. + :type source_uris: One of: + str + sequence of string + :param source_uris: URIs of data files to be loaded; in format + ``gs:///``. - :type destination: :class:`google.cloud.bigquery.table.Table` + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be loaded. - :type source_uris: sequence of string - :param source_uris: URIs of data files to be loaded; in format - ``gs:///``. + :type job_id: str + :param job_id: (Optional) Name of the job. + + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + + :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` - :returns: a new ``LoadTableFromStorageJob`` instance + :rtype: :class:`google.cloud.bigquery.LoadJob` + :returns: a new ``LoadJob`` instance """ - return LoadTableFromStorageJob(job_name, destination, source_uris, - client=self) + job_id = _make_job_id(job_id, job_id_prefix) + if isinstance(source_uris, six.string_types): + source_uris = [source_uris] + job = LoadJob(job_id, source_uris, destination, self, job_config) + job._begin(retry=retry) + return job + + def load_table_from_file(self, file_obj, destination, + rewind=False, + size=None, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=None, job_id_prefix=None, job_config=None): + """Upload the contents of this table from a file-like object. + + Like load_table_from_uri, this creates, starts and returns + a ``LoadJob``. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type destination: :class:`google.cloud.bigquery.table.TableReference` + :param destination: Table into which data is to be loaded. + + :type rewind: bool + :param rewind: If True, seek to the beginning of the file handle before + reading the file. + + :type size: int + :param size: The number of bytes to read from the file handle. + If size is ``None`` or large, resumable upload will be + used. Otherwise, multipart upload will be used. - def copy_table(self, job_name, destination, *sources): - """Construct a job for copying one or more tables into another table. + :type num_retries: int + :param num_retries: Number of upload retries. Defaults to 6. + + :type job_id: str + :param job_id: (Optional) Name of the job. + + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + + :type job_config: :class:`google.cloud.bigquery.LoadJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :rtype: :class:`~google.cloud.bigquery.LoadJob` + + :returns: the job instance used to load the data (e.g., for + querying status). + :raises: :class:`ValueError` if ``size`` is not passed in and can not + be determined, or if the ``file_obj`` can be detected to be + a file opened in text mode. + """ + job_id = _make_job_id(job_id, job_id_prefix) + job = LoadJob(job_id, None, destination, self, job_config) + job_resource = job._build_resource() + if rewind: + file_obj.seek(0, os.SEEK_SET) + _check_mode(file_obj) + try: + if size is None or size >= _MAX_MULTIPART_SIZE: + response = self._do_resumable_upload( + file_obj, job_resource, num_retries) + else: + response = self._do_multipart_upload( + file_obj, job_resource, size, num_retries) + except resumable_media.InvalidResponse as exc: + raise exceptions.from_http_response(exc.response) + return self.job_from_resource(response.json()) + + def _do_resumable_upload(self, stream, metadata, num_retries): + """Perform a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the final chunk + is uploaded. + """ + upload, transport = self._initiate_resumable_upload( + stream, metadata, num_retries) + + while not upload.finished: + response = upload.transmit_next_chunk(transport) + + return response + + def _initiate_resumable_upload(self, stream, metadata, num_retries): + """Initiate a resumable upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: tuple + :returns: + Pair of + + * The :class:`~google.resumable_media.requests.ResumableUpload` + that was created + * The ``transport`` used to initiate the upload. + """ + chunk_size = _DEFAULT_CHUNKSIZE + transport = self._http + headers = _get_upload_headers(self._connection.USER_AGENT) + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + # TODO: modify ResumableUpload to take a retry.Retry object + # that it can use for the initial RPC. + upload = ResumableUpload(upload_url, chunk_size, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + upload.initiate( + transport, stream, metadata, _GENERIC_CONTENT_TYPE, + stream_final=False) + + return upload, transport + + def _do_multipart_upload(self, stream, metadata, size, num_retries): + """Perform a multipart upload. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :type metadata: dict + :param metadata: The metadata associated with the upload. + + :type size: int + :param size: The number of bytes to be uploaded (which will be read + from ``stream``). If not provided, the upload will be + concluded once ``stream`` is exhausted (or :data:`None`). + + :type num_retries: int + :param num_retries: Number of upload retries. (Deprecated: This + argument will be removed in a future release.) + + :rtype: :class:`~requests.Response` + :returns: The "200 OK" response object returned after the multipart + upload request. + :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` + bytes remaining. + """ + data = stream.read(size) + if len(data) < size: + msg = _READ_LESS_THAN_SIZE.format(size, len(data)) + raise ValueError(msg) + + headers = _get_upload_headers(self._connection.USER_AGENT) + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + upload = MultipartUpload(upload_url, headers=headers) + + if num_retries is not None: + upload._retry_strategy = resumable_media.RetryStrategy( + max_retries=num_retries) + + response = upload.transmit( + self._http, data, metadata, _GENERIC_CONTENT_TYPE) + + return response + + def copy_table(self, sources, destination, job_id=None, job_id_prefix=None, + job_config=None, retry=DEFAULT_RETRY): + """Start a job for copying one or more tables into another table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy - :type job_name: str - :param job_name: Name of the job. + :type sources: One of: + :class:`~google.cloud.bigquery.TableReference` + sequence of + :class:`~google.cloud.bigquery.TableReference` + :param sources: Table or tables to be copied. - :type destination: :class:`google.cloud.bigquery.table.Table` + + :type destination: :class:`google.cloud.bigquery.table.TableReference` :param destination: Table into which data is to be copied. - :type sources: sequence of :class:`google.cloud.bigquery.table.Table` - :param sources: tables to be copied. + :type job_id: str + :param job_id: (Optional) The ID of the job. + + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. - :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :type job_config: :class:`google.cloud.bigquery.CopyJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.CopyJob` :returns: a new ``CopyJob`` instance """ - return CopyJob(job_name, destination, sources, client=self) + job_id = _make_job_id(job_id, job_id_prefix) - def extract_table_to_storage(self, job_name, source, *destination_uris): - """Construct a job for extracting a table into Cloud Storage files. + if not isinstance(sources, collections.Sequence): + sources = [sources] + job = CopyJob(job_id, sources, destination, client=self, + job_config=job_config) + job._begin(retry=retry) + return job + + def extract_table( + self, source, destination_uris, job_config=None, job_id=None, + job_id_prefix=None, retry=DEFAULT_RETRY): + """Start a job to extract a table into Cloud Storage files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract - :type job_name: str - :param job_name: Name of the job. - - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.TableReference` :param source: table to be extracted. - :type destination_uris: sequence of string - :param destination_uris: URIs of CloudStorage file(s) into which - table data is to be extracted; in format - ``gs:///``. + :type destination_uris: One of: + str or + sequence of str + :param destination_uris: + URIs of Cloud Storage file(s) into which table data is to be + extracted; in format ``gs:///``. + + :type kwargs: dict + :param kwargs: Additional keyword arguments. + + :type job_id: str + :param job_id: (Optional) The ID of the job. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` - :returns: a new ``ExtractTableToStorageJob`` instance + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. + + :type job_config: :class:`google.cloud.bigquery.ExtractJobConfig` + :param job_config: (Optional) Extra configuration options for the job. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`google.cloud.bigquery.ExtractJob` + :returns: a new ``ExtractJob`` instance """ - return ExtractTableToStorageJob(job_name, source, destination_uris, - client=self) + job_id = _make_job_id(job_id, job_id_prefix) + + if isinstance(destination_uris, six.string_types): + destination_uris = [destination_uris] - def run_async_query(self, job_name, query, - udf_resources=(), query_parameters=()): - """Construct a job for running a SQL query asynchronously. + job = ExtractJob( + job_id, source, destination_uris, client=self, + job_config=job_config) + job._begin(retry=retry) + return job + + def query(self, query, job_config=None, job_id=None, job_id_prefix=None, + retry=DEFAULT_RETRY): + """Start a job that runs a SQL query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query - :type job_name: str - :param job_name: Name of the job. - :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. + + :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. + + :type job_id_prefix: str or ``NoneType`` + :param job_id_prefix: (Optional) the user-provided prefix for a + randomly generated job ID. This parameter will be + ignored if a ``job_id`` is also given. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. - :rtype: :class:`google.cloud.bigquery.job.QueryJob` + :rtype: :class:`google.cloud.bigquery.QueryJob` :returns: a new ``QueryJob`` instance """ - return QueryJob(job_name, query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + job_id = _make_job_id(job_id, job_id_prefix) + job = QueryJob(job_id, query, client=self, job_config=job_config) + job._begin(retry=retry) + return job - def run_sync_query(self, query, udf_resources=(), query_parameters=()): - """Run a SQL query synchronously. + def create_rows(self, table, rows, selected_fields=None, **kwargs): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type rows: One of: + list of tuples + list of dictionaries + :param rows: Row data to be inserted. If a list of tuples is given, + each tuple should contain data for each schema field on + the current table and in the same order as the schema + fields. If a list of dictionaries is given, the keys must + include all required fields in the schema. Keys which do + not correspond to a field in the schema are ignored. + + :type selected_fields: + list of :class:`~google.cloud.bigquery.SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.TableReference`. + + :type kwargs: dict + :param kwargs: Keyword arguments to + `~google.cloud.bigquery.Client.create_rows_json` + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + :raises: ValueError if table's schema is not set + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + json_rows = [] + + for index, row in enumerate(rows): + if isinstance(row, dict): + row = _row_from_mapping(row, schema) + json_row = {} + + for field, value in zip(schema, row): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) + if converter is not None: # STRING doesn't need converting + value = converter(value) + json_row[field.name] = value + + json_rows.append(json_row) + + return self.create_rows_json(table, json_rows, **kwargs) + + def create_rows_json(self, table, json_rows, row_ids=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None, retry=DEFAULT_RETRY): + """API call: insert table data via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + + :type table: One of: + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` + :param table: the destination table for the row data, or a reference + to it. + + :type json_rows: list of dictionaries + :param json_rows: Row data to be inserted. Keys must match the table + schema fields and values must be JSON-compatible + representations. + + :type row_ids: list of string + :param row_ids: (Optional) Unique ids, one per row being inserted. + If omitted, unique IDs are created. + + :type skip_invalid_rows: bool + :param skip_invalid_rows: (Optional) Insert all valid rows of a + request, even if invalid rows exist. + The default value is False, which causes + the entire request to fail if any invalid + rows exist. + + :type ignore_unknown_values: bool + :param ignore_unknown_values: (Optional) Accept rows that contain + values that do not match the schema. + The unknown values are ignored. Default + is False, which treats unknown values as + errors. + + :type template_suffix: str + :param template_suffix: + (Optional) treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` based + on the schema of the template table. See + https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: list of mappings + :returns: One mapping per row with insert errors: the "index" key + identifies the row, and the "errors" key contains a list + of the mappings describing one or more problems with the + row. + """ + rows_info = [] + data = {'rows': rows_info} + + for index, row in enumerate(json_rows): + info = {'json': row} + if row_ids is not None: + info['insertId'] = row_ids[index] + else: + info['insertId'] = str(uuid.uuid4()) + rows_info.append(info) + + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + + # We can always retry, because every row has an insert ID. + response = self._call_api( + retry, + method='POST', + path='%s/insertAll' % table.path, + data=data) + errors = [] + + for error in response.get('insertErrors', ()): + errors.append({'index': int(error['index']), + 'errors': error['errors']}) + + return errors + + def query_rows(self, query, job_config=None, job_id=None, timeout=None, + retry=DEFAULT_RETRY): + """Start a query job and wait for the results. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query :type query: str - :param query: SQL query to be executed + :param query: + SQL query to be executed. Defaults to the standard SQL dialect. + Use the ``job_config`` parameter to change dialects. - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) + :type job_config: :class:`google.cloud.bigquery.QueryJobConfig` + :param job_config: (Optional) Extra configuration options for the job. - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type job_id: str + :param job_id: (Optional) ID to use for the query job. - :rtype: :class:`google.cloud.bigquery.query.QueryResults` - :returns: a new ``QueryResults`` instance + :type timeout: float + :param timeout: + (Optional) How long (in seconds) to wait for job to complete + before raising a :class:`TimeoutError`. + + :rtype: :class:`~google.api_core.page_iterator.Iterator` + :returns: + Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, which counts + the total number of rows **in the result set** (this is distinct + from the total number of rows in the current page: + ``iterator.page.num_items``). + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job + failed or :class:`TimeoutError` if the job did not complete in the + given timeout. + """ + job = self.query( + query, job_config=job_config, job_id=job_id, retry=retry) + return job.result(timeout=timeout) + + def list_rows(self, table, selected_fields=None, max_results=None, + page_token=None, start_index=None, retry=DEFAULT_RETRY): + """List the rows of the table. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list + + .. note:: + + This method assumes that the provided schema is up-to-date with the + schema as defined on the back-end: if the two schemas are not + identical, the values returned may be incomplete. To ensure that the + local copy of the schema is up-to-date, call ``client.get_table``. + + :type table: One of: + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` + :param table: the table to list, or a reference to it. + + :type selected_fields: + list of :class:`~google.cloud.bigquery.SchemaField` + :param selected_fields: + The fields to return. Required if ``table`` is a + :class:`~google.cloud.bigquery.TableReference`. + + :type max_results: int + :param max_results: maximum number of rows to return. + + :type page_token: str + :param page_token: (Optional) Token representing a cursor into the + table's rows. + + :type start_index: int + :param page_token: (Optional) The zero-based index of the starting + row to read. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`~google.api_core.page_iterator.Iterator` + :returns: Iterator of row data :class:`tuple`s. During each page, the + iterator will have the ``total_rows`` attribute set, + which counts the total number of rows **in the table** + (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + """ + if selected_fields is not None: + schema = selected_fields + elif isinstance(table, TableReference): + raise ValueError('need selected_fields with TableReference') + elif isinstance(table, Table): + if len(table._schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + schema = table.schema + else: + raise TypeError('table should be Table or TableReference') + + params = {} + if selected_fields is not None: + params['selectedFields'] = ','.join( + field.name for field in selected_fields) + + if start_index is not None: + params['startIndex'] = start_index + + iterator = page_iterator.HTTPIterator( + client=self, + api_request=functools.partial(self._call_api, retry), + path='%s/data' % (table.path,), + item_to_value=_item_to_row, + items_key='rows', + page_token=page_token, + next_token='pageToken', + max_results=max_results, + page_start=_rows_page_start, + extra_params=params) + iterator.schema = schema + iterator._field_to_index = _field_to_index_mapping(schema) + return iterator + + def list_partitions(self, table, retry=DEFAULT_RETRY): + """List the partitions in a table. + + :type table: One of: + :class:`~google.cloud.bigquery.Table` + :class:`~google.cloud.bigquery.TableReference` + :param table: the table to list, or a reference to it. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: list + :returns: a list of time partitions """ - return QueryResults(query, client=self, - udf_resources=udf_resources, - query_parameters=query_parameters) + config = QueryJobConfig() + config.use_legacy_sql = True # required for '$' syntax + rows = self.query_rows( + 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' % + (table.project, table.dataset_id, table.table_id), + job_config=config, + retry=retry) + return [row[0] for row in rows] # pylint: disable=unused-argument @@ -422,7 +1274,7 @@ def _item_to_dataset(iterator, resource): :rtype: :class:`.Dataset` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource, iterator.client) + return Dataset.from_api_repr(resource) def _item_to_job(iterator, resource): @@ -438,3 +1290,72 @@ def _item_to_job(iterator, resource): :returns: The next job in the page. """ return iterator.client.job_from_resource(resource) + + +def _item_to_table(iterator, resource): + """Convert a JSON table to the native object. + + :type iterator: :class:`~google.api_core.page_iterator.Iterator` + :param iterator: The iterator that is currently in use. + + :type resource: dict + :param resource: An item to be converted to a table. + + :rtype: :class:`~google.cloud.bigquery.Table` + :returns: The next table in the page. + """ + return Table.from_api_repr(resource) + + +def _make_job_id(job_id, prefix=None): + """Construct an ID for a new job. + + :type job_id: str or ``NoneType`` + :param job_id: the user-provided job ID + + :type prefix: str or ``NoneType`` + :param prefix: (Optional) the user-provided prefix for a job ID + + :rtype: str + :returns: A job ID + """ + if job_id is not None: + return job_id + elif prefix is not None: + return str(prefix) + str(uuid.uuid4()) + else: + return str(uuid.uuid4()) + + +def _check_mode(stream): + """Check that a stream was opened in read-binary mode. + + :type stream: IO[bytes] + :param stream: A bytes IO object open for reading. + + :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute + and is not among ``rb``, ``r+b`` or ``rb+``. + """ + mode = getattr(stream, 'mode', None) + + if mode is not None and mode not in ('rb', 'r+b', 'rb+'): + raise ValueError( + "Cannot upload files opened in text mode: use " + "open(filename, mode='rb') or open(filename, mode='r+b')") + + +def _get_upload_headers(user_agent): + """Get the headers for an upload request. + + :type user_agent: str + :param user_agent: The user-agent for requests. + + :rtype: dict + :returns: The headers to be used for the request. + """ + return { + 'Accept': 'application/json', + 'Accept-Encoding': 'gzip, deflate', + 'User-Agent': user_agent, + 'content-type': 'application/json', + } diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 95d238e271fa..ef1b59f869c6 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -13,15 +13,16 @@ # limitations under the License. """Define API Datasets.""" + +from __future__ import absolute_import + import six -from google.api_core import page_iterator from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.exceptions import NotFound -from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference -class AccessGrant(object): +class AccessEntry(object): """Represent grant of an access role to an entity. Every entry in the access list will have exactly one of @@ -76,7 +77,7 @@ def __init__(self, role, entity_type, entity_id): self.entity_id = entity_id def __eq__(self, other): - if not isinstance(other, AccessGrant): + if not isinstance(other, AccessEntry): return NotImplemented return ( self.role == other.role and @@ -87,47 +88,131 @@ def __ne__(self, other): return not self == other def __repr__(self): - return '' % ( + return '' % ( self.role, self.entity_type, self.entity_id) -class Dataset(object): - """Datasets are containers for tables. +class DatasetReference(object): + """DatasetReferences are pointers to datasets. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets - :type name: str - :param name: the name of the dataset + :type project: str + :param project: the ID of the project - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :type dataset_id: str + :param dataset_id: the ID of the dataset + """ - :type access_grants: list of :class:`AccessGrant` - :param access_grants: roles granted to entities for this dataset + def __init__(self, project, dataset_id): + if not isinstance(project, six.string_types): + raise ValueError("Pass a string for project") + if not isinstance(dataset_id, six.string_types): + raise ValueError("Pass a string for dataset_id") + self._project = project + self._dataset_id = dataset_id - :type project: str - :param project: (Optional) project ID for the dataset (defaults to - the project of the client). - """ + @property + def project(self): + """Project ID of the dataset. + + :rtype: str + :returns: the project ID. + """ + return self._project + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + + @property + def path(self): + """URL path for the dataset's APIs. + + :rtype: str + :returns: the path based on project and dataset name. + """ + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) + + def table(self, table_id): + """Constructs a TableReference. + + :type table_id: str + :param table_id: the ID of the table. + + :rtype: :class:`google.cloud.bigquery.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self, table_id) + + @classmethod + def from_api_repr(cls, resource): + project = resource['projectId'] + dataset_id = resource['datasetId'] + return cls(project, dataset_id) + + def to_api_repr(self): + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`.DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + ) + + def __eq__(self, other): + if not isinstance(other, DatasetReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'DatasetReference{}'.format(self._key()) - _access_grants = None - def __init__(self, name, client, access_grants=(), project=None): - self.name = name - self._client = client - self._properties = {} - # Let the @property do validation. - self.access_grants = access_grants - self._project = project or client.project +class Dataset(object): + """Datasets are containers for tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets + + :type dataset_ref: :class:`~google.cloud.bigquery.DatasetReference` + :param dataset_ref: a pointer to a dataset + """ + + def __init__(self, dataset_ref): + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id + self._properties = {'labels': {}} + self._access_entries = () @property def project(self): """Project bound to the dataset. :rtype: str - :returns: the project (derived from the client). + :returns: the project. """ return self._project @@ -136,32 +221,32 @@ def path(self): """URL path for the dataset's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project and dataset ID. """ - return '/projects/%s/datasets/%s' % (self.project, self.name) + return '/projects/%s/datasets/%s' % (self.project, self.dataset_id) @property - def access_grants(self): - """Dataset's access grants. + def access_entries(self): + """Dataset's access entries. - :rtype: list of :class:`AccessGrant` + :rtype: list of :class:`AccessEntry` :returns: roles granted to entities for this dataset """ - return list(self._access_grants) + return list(self._access_entries) - @access_grants.setter - def access_grants(self, value): - """Update dataset's access grants + @access_entries.setter + def access_entries(self, value): + """Update dataset's access entries - :type value: list of :class:`AccessGrant` + :type value: list of :class:`~google.cloud.bigquery.AccessEntry` :param value: roles granted to entities for this dataset :raises: TypeError if 'value' is not a sequence, or ValueError if - any item in the sequence is not an AccessGrant + any item in the sequence is not an AccessEntry """ - if not all(isinstance(field, AccessGrant) for field in value): - raise ValueError('Values must be AccessGrant instances') - self._access_grants = tuple(value) + if not all(isinstance(field, AccessEntry) for field in value): + raise ValueError('Values must be AccessEntry instances') + self._access_entries = tuple(value) @property def created(self): @@ -177,7 +262,16 @@ def created(self): @property def dataset_id(self): - """ID for the dataset resource. + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._dataset_id + + @property + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". :rtype: str, or ``NoneType`` :returns: the ID (None until set from the server). @@ -302,68 +396,75 @@ def location(self, value): raise ValueError("Pass a string, or None") self._properties['location'] = value + @property + def labels(self): + """Labels for the dataset. + + This method always returns a dict. To change a dataset's labels, + modify the dict, then call ``Client.update_dataset``. To delete a + label, set its value to ``None`` before updating. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties['labels'] + + @labels.setter + def labels(self, value): + """Update labels for the dataset. + + :type value: dict, {str -> str} + :param value: new labels + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value + @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation :type resource: dict :param resource: dataset resource representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. - - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` + :rtype: :class:`~google.cloud.bigquery.Dataset` :returns: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource or - 'datasetId' not in resource['datasetReference']): + dsr = resource.get('datasetReference') + if dsr is None or 'datasetId' not in dsr: raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - name = resource['datasetReference']['datasetId'] - dataset = cls(name, client=client) + dataset_id = dsr['datasetId'] + dataset = cls(DatasetReference(dsr['projectId'], dataset_id)) dataset._set_properties(resource) return dataset - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - @staticmethod - def _parse_access_grants(access): - """Parse a resource fragment into a set of access grants. + def _parse_access_entries(access): + """Parse a resource fragment into a set of access entries. ``role`` augments the entity type and present **unless** the entity type is ``view``. :type access: list of mappings - :param access: each mapping represents a single access grant. + :param access: each mapping represents a single access entry. - :rtype: list of :class:`AccessGrant` - :returns: a list of parsed grants. - :raises: :class:`ValueError` if a grant in ``access`` has more keys + :rtype: list of :class:`~google.cloud.bigquery.AccessEntry` + :returns: a list of parsed entries. + :raises: :class:`ValueError` if a entry in ``access`` has more keys than ``role`` and one additional key. """ result = [] - for grant in access: - grant = grant.copy() - role = grant.pop('role', None) - entity_type, entity_id = grant.popitem() - if len(grant) != 0: - raise ValueError('Grant has unexpected keys remaining.', grant) + for entry in access: + entry = entry.copy() + role = entry.pop('role', None) + entity_type, entity_id = entry.popitem() + if len(entry) != 0: + raise ValueError('Entry has unexpected keys remaining.', entry) result.append( - AccessGrant(role, entity_type, entity_id)) + AccessEntry(role, entity_type, entity_id)) return result def _set_properties(self, api_response): @@ -375,7 +476,7 @@ def _set_properties(self, api_response): self._properties.clear() cleaned = api_response.copy() access = cleaned.pop('access', ()) - self.access_grants = self._parse_access_grants(access) + self.access_entries = self._parse_access_entries(access) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: @@ -383,15 +484,17 @@ def _set_properties(self, api_response): if 'defaultTableExpirationMs' in cleaned: cleaned['defaultTableExpirationMs'] = int( cleaned['defaultTableExpirationMs']) + if 'labels' not in cleaned: + cleaned['labels'] = {} self._properties.update(cleaned) def _build_access_resource(self): - """Generate a resource fragment for dataset's access grants.""" + """Generate a resource fragment for dataset's access entries.""" result = [] - for grant in self.access_grants: - info = {grant.entity_type: grant.entity_id} - if grant.role is not None: - info['role'] = grant.role + for entry in self.access_entries: + info = {entry.entity_type: entry.entity_id} + if entry.role is not None: + info['role'] = entry.role result.append(info) return result @@ -399,7 +502,7 @@ def _build_resource(self): """Generate a resource for ``create`` or ``update``.""" resource = { 'datasetReference': { - 'projectId': self.project, 'datasetId': self.name}, + 'projectId': self.project, 'datasetId': self.dataset_id}, } if self.default_table_expiration_ms is not None: value = self.default_table_expiration_ms @@ -414,194 +517,20 @@ def _build_resource(self): if self.location is not None: resource['location'] = self.location - if len(self.access_grants) > 0: + if len(self.access_entries) > 0: resource['access'] = self._build_access_resource() - return resource - - def create(self, client=None): - """API call: create the dataset via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - - def exists(self, client=None): - """API call: test for the existence of the dataset via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the dataset. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except NotFound: - return False - else: - return True - - def reload(self, client=None): - """API call: refresh dataset properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - - def patch(self, client=None, **kw): - """API call: update individual dataset properties via a PATCH request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + resource['labels'] = self.labels # labels is never None - :type kw: ``dict`` - :param kw: properties to be patched. - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if 'default_table_expiration_ms' in kw: - value = kw['default_table_expiration_ms'] - if not isinstance(value, six.integer_types) and value is not None: - raise ValueError("Pass an integer, or None") - partial['defaultTableExpirationMs'] = value - - if 'description' in kw: - partial['description'] = kw['description'] - - if 'friendly_name' in kw: - partial['friendlyName'] = kw['friendly_name'] - - if 'location' in kw: - partial['location'] = kw['location'] - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update dataset properties via a PUT request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - - def delete(self, client=None): - """API call: delete the dataset via a DELETE request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - - def list_tables(self, max_results=None, page_token=None): - """List tables for the project associated with this client. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list - - :type max_results: int - :param max_results: (Optional) Maximum number of tables to return. - If not passed, defaults to a value set by the API. - - :type page_token: str - :param page_token: (Optional) Opaque marker for the next "page" of - datasets. If not passed, the API will return the - first page of datasets. - - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.table.Table` - contained within the current dataset. - """ - path = '/projects/%s/datasets/%s/tables' % (self.project, self.name) - result = page_iterator.HTTPIterator( - client=self._client, - api_request=self._client._connection.api_request, - path=path, - item_to_value=_item_to_table, - items_key='tables', - page_token=page_token, - max_results=max_results) - result.dataset = self - return result - - def table(self, name, schema=()): - """Construct a table bound to this dataset. + return resource - :type name: str - :param name: Name of the table. + def table(self, table_id): + """Constructs a TableReference. - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The table's schema + :type table_id: str + :param table_id: the ID of the table. - :rtype: :class:`google.cloud.bigquery.table.Table` - :returns: a new ``Table`` instance + :rtype: :class:`~google.cloud.bigquery.TableReference` + :returns: a TableReference for a table in this dataset. """ - return Table(name, dataset=self, schema=schema) - - -def _item_to_table(iterator, resource): - """Convert a JSON table to the native object. - - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type resource: dict - :param resource: An item to be converted to a table. - - :rtype: :class:`~google.cloud.bigquery.table.Table` - :returns: The next table in the page. - """ - return Table.from_api_repr(resource, iterator.dataset) + return TableReference(self, table_id) diff --git a/bigquery/google/cloud/bigquery/dbapi/__init__.py b/bigquery/google/cloud/bigquery/dbapi/__init__.py index 4786ef8ef5fa..6d6f70f471d9 100644 --- a/bigquery/google/cloud/bigquery/dbapi/__init__.py +++ b/bigquery/google/cloud/bigquery/dbapi/__init__.py @@ -19,11 +19,6 @@ .. _Python Database API Specification v2.0 (DB-API): https://www.python.org/dev/peps/pep-0249/ - -.. warning:: - The ``dbapi`` module is **alpha**. The implementation is not complete. It - might be changed in backward-incompatible ways and is not subject to any SLA - or deprecation policy. """ from google.cloud.bigquery.dbapi.connection import connect diff --git a/bigquery/google/cloud/bigquery/dbapi/_helpers.py b/bigquery/google/cloud/bigquery/dbapi/_helpers.py index a9a358cbf0f5..a2cee9c5272b 100644 --- a/bigquery/google/cloud/bigquery/dbapi/_helpers.py +++ b/bigquery/google/cloud/bigquery/dbapi/_helpers.py @@ -69,7 +69,7 @@ def to_query_parameters_list(parameters): :type parameters: Sequence[Any] :param parameters: Sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ return [scalar_to_query_parameter(value) for value in parameters] @@ -81,7 +81,7 @@ def to_query_parameters_dict(parameters): :type parameters: Mapping[str, Any] :param parameters: Dictionary of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of named query parameters. """ return [ @@ -96,7 +96,7 @@ def to_query_parameters(parameters): :type parameters: Mapping[str, Any] or Sequence[Any] :param parameters: A dictionary or sequence of query parameter values. - :rtype: List[google.cloud.bigquery._helpers.AbstractQueryParameter] + :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter] :returns: A list of query parameters. """ if parameters is None: diff --git a/bigquery/google/cloud/bigquery/dbapi/cursor.py b/bigquery/google/cloud/bigquery/dbapi/cursor.py index c1683c16db79..914d2e07c553 100644 --- a/bigquery/google/cloud/bigquery/dbapi/cursor.py +++ b/bigquery/google/cloud/bigquery/dbapi/cursor.py @@ -15,10 +15,10 @@ """Cursor for the Google BigQuery DB-API.""" import collections -import uuid import six +from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions import google.cloud.exceptions @@ -52,7 +52,7 @@ def __init__(self, connection): # a single row at a time. self.arraysize = 1 self._query_data = None - self._query_results = None + self._query_job = None def close(self): """No-op.""" @@ -133,10 +133,8 @@ def execute(self, operation, parameters=None, job_id=None): is generated at random. """ self._query_data = None - self._query_results = None + self._query_job = None client = self.connection._client - if job_id is None: - job_id = str(uuid.uuid4()) # The DB-API uses the pyformat formatting, since the way BigQuery does # query parameters was not one of the standard options. Convert both @@ -146,20 +144,19 @@ def execute(self, operation, parameters=None, job_id=None): operation, parameters=parameters) query_parameters = _helpers.to_query_parameters(parameters) - query_job = client.run_async_query( - job_id, - formatted_operation, - query_parameters=query_parameters) - query_job.use_legacy_sql = False + config = job.QueryJobConfig() + config.query_parameters = query_parameters + config.use_legacy_sql = False + self._query_job = client.query( + formatted_operation, job_config=config, job_id=job_id) # Wait for the query to finish. try: - query_job.result() + self._query_job.result() except google.cloud.exceptions.GoogleCloudError: - raise exceptions.DatabaseError(query_job.errors) + raise exceptions.DatabaseError(self._query_job.errors) - query_results = query_job.query_results() - self._query_results = query_results + query_results = self._query_job.query_results() self._set_rowcount(query_results) self._set_description(query_results.schema) @@ -180,16 +177,24 @@ def _try_fetch(self, size=None): Mutates self to indicate that iteration has started. """ - if self._query_results is None: + if self._query_job is None: raise exceptions.InterfaceError( 'No query results: execute() must be called before fetch.') - if size is None: - size = self.arraysize + is_dml = ( + self._query_job.statement_type + and self._query_job.statement_type.upper() != 'SELECT') + if is_dml: + self._query_data = iter([]) + return if self._query_data is None: - self._query_data = iter( - self._query_results.fetch_data(max_results=size)) + client = self.connection._client + # TODO(tswast): pass in page size to list_rows based on arraysize + rows_iter = client.list_rows( + self._query_job.destination, + selected_fields=self._query_job.query_results().schema) + self._query_data = iter(rows_iter) def fetchone(self): """Fetch a single row from the results of the last ``execute*()`` call. @@ -247,7 +252,7 @@ def fetchall(self): if called before ``execute()``. """ self._try_fetch() - return [row for row in self._query_data] + return list(self._query_data) def setinputsizes(self, sizes): """No-op.""" diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py new file mode 100644 index 000000000000..a40d873eea06 --- /dev/null +++ b/bigquery/google/cloud/bigquery/external_config.py @@ -0,0 +1,492 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define classes that describe external data sources. + + These are used for both Table.externalDataConfiguration and + Job.configuration.query.tableDefinitions. +""" + +from __future__ import absolute_import + +import base64 +import copy + +import six + +from google.cloud.bigquery._helpers import _to_bytes +from google.cloud.bigquery._helpers import _bytes_to_json +from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery._helpers import _int_or_none +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import _build_schema_resource +from google.cloud.bigquery.table import _parse_schema_resource + + +class BigtableColumn(object): + """Options for a Bigtable column.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding + """ + + field_name = _TypedApiResourceProperty( + 'field_name', 'fieldName', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest + """ + + qualifier_encoded = _TypedApiResourceProperty( + 'qualifier_encoded', 'qualifierEncoded', six.binary_type) + """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or + ``bytes`` (Python 3.x). The module will handle base64 encoding for you. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded + """ + + qualifier_string = _TypedApiResourceProperty( + 'qualifier_string', 'qualifierString', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + qe = config.get('qualifierEncoded') + if qe is not None: + config['qualifierEncoded'] = _bytes_to_json(qe) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumn given its API representation + + :type resource: dict + :param resource: + A column in the same representation as is returned from the API. + + :rtype: :class:`~google.cloud.bigquery.BigtableColumn` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + qe = resource.get('qualifierEncoded') + if qe: + config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe)) + return config + + +class BigtableColumnFamily(object): + """Options for a Bigtable column family.""" + + def __init__(self): + self._properties = {} + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding + """ + + family_id = _TypedApiResourceProperty( + 'family_id', 'familyId', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId + """ + + only_read_latest = _TypedApiResourceProperty( + 'only_read_latest', 'onlyReadLatest', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest + """ + + type_ = _TypedApiResourceProperty('type_', 'type', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type + """ + + columns = _ListApiResourceProperty( + 'columns', 'columns', BigtableColumn) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columns'] = [c.to_api_repr() for c in config['columns']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableColumnFamily given its + API representation + + :type resource: dict + :param resource: + A column family in the same representation as is returned + from the API. + + :rtype: + :class:`~google.cloud.bigquery.BigtableColumnFamily` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.columns = [BigtableColumn.from_api_repr(c) + for c in resource['columns']] + return config + + +class BigtableOptions(object): + """Options that describe how to treat Bigtable tables + as BigQuery tables.""" + + _SOURCE_FORMAT = 'BIGTABLE' + _RESOURCE_NAME = 'bigtableOptions' + + def __init__(self): + self._properties = {} + + ignore_unspecified_column_families = _TypedApiResourceProperty( + 'ignore_unspecified_column_families', + 'ignoreUnspecifiedColumnFamilies', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies + """ + + read_rowkey_as_string = _TypedApiResourceProperty( + 'read_rowkey_as_string', 'readRowkeyAsString', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString + """ + + column_families = _ListApiResourceProperty( + 'column_families', 'columnFamilies', BigtableColumnFamily) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + config['columnFamilies'] = [cf.to_api_repr() + for cf in config['columnFamilies']] + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a BigtableOptions given its API representation + + :type resource: dict + :param resource: + A BigtableOptions in the same representation as is returned + from the API. + + :rtype: :class:`~google.cloud.bigquery.BigtableOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + config.column_families = [BigtableColumnFamily.from_api_repr(cf) + for cf in resource['columnFamilies']] + return config + + +class CSVOptions(object): + """Options that describe how to treat CSV files as BigQuery tables.""" + + _SOURCE_FORMAT = 'CSV' + _RESOURCE_NAME = 'csvOptions' + + def __init__(self): + self._properties = {} + + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows + """ + + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines + """ + + encoding = _TypedApiResourceProperty( + 'encoding', 'encoding', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + A CSVOptions in the same representation as is + returned from the API. + + :rtype: :class:`~google.cloud.bigquery.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.get('skipLeadingRows') + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config + + +class GoogleSheetsOptions(object): + """Options that describe how to treat Google Sheets as BigQuery tables.""" + + _SOURCE_FORMAT = 'GOOGLE_SHEETS' + _RESOURCE_NAME = 'googleSheetsOptions' + + def __init__(self): + self._properties = {} + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + slr = config.pop('skipLeadingRows', None) + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a GoogleSheetsOptions given its API representation + + :type resource: dict + :param resource: + An GoogleSheetsOptions in the same representation as is + returned from the API. + + :rtype: + :class:`~google.cloud.bigquery.GoogleSheetsOptions` + :returns: Configuration parsed from ``resource``. + """ + slr = resource.get('skipLeadingRows') + config = cls() + config._properties = copy.deepcopy(resource) + config.skip_leading_rows = _int_or_none(slr) + return config + + +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) + + +class ExternalConfig(object): + """Description of an external data source. + + :type source_format: str + :param source_format: the format of the external data. See + the ``source_format`` property on this class. + """ + + def __init__(self, source_format): + self._properties = {'sourceFormat': source_format} + self._options = None + for optcls in _OPTION_CLASSES: + if source_format == optcls._SOURCE_FORMAT: + self._options = optcls() + break + + @property + def source_format(self): + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat + """ + return self._properties['sourceFormat'] + + @property + def options(self): + """Source-specific options.""" + return self._options + + autodetect = _TypedApiResourceProperty( + 'autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect + """ + + compression = _TypedApiResourceProperty( + 'compression', 'compression', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords + """ + + source_uris = _ListApiResourceProperty( + 'source_uris', 'sourceUris', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris + """ + + schema = _ListApiResourceProperty('schema', 'schema', SchemaField) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema + """ + + def to_api_repr(self): + """Build an API representation of this object. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + if self.schema: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + if self.options is not None: + r = self.options.to_api_repr() + if r != {}: + config[self.options._RESOURCE_NAME] = r + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a CSVOptions given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`~google.cloud.bigquery.CSVOptions` + :returns: Configuration parsed from ``resource``. + """ + config = cls(resource['sourceFormat']) + schema = resource.get('schema') + for optcls in _OPTION_CLASSES: + opts = resource.get(optcls._RESOURCE_NAME) + if opts is not None: + config._options = optcls.from_api_repr(opts) + break + config._properties = copy.deepcopy(resource) + if schema: + config.schema = _parse_schema_resource(schema) + return config diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 2ee535853277..696086ac1ed2 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -14,6 +14,7 @@ """Define API Jobs.""" +import copy import threading import six @@ -23,18 +24,27 @@ from google.cloud import exceptions from google.cloud.exceptions import NotFound from google.cloud._helpers import _datetime_from_microseconds -from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.query import _AbstractQueryParameter +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import _build_schema_resource from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import QueryParametersProperty -from google.cloud.bigquery._helpers import UDFResourcesProperty -from google.cloud.bigquery._helpers import _EnumProperty -from google.cloud.bigquery._helpers import _TypedProperty +from google.cloud.bigquery._helpers import _EnumApiResourceProperty +from google.cloud.bigquery._helpers import _ListApiResourceProperty +from google.cloud.bigquery._helpers import _TypedApiResourceProperty +from google.cloud.bigquery._helpers import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _int_or_none _DONE_STATE = 'DONE' _STOPPED_REASON = 'stopped' +_TIMEOUT_BUFFER_SECS = 0.1 _ERROR_REASON_TO_EXCEPTION = { 'accessDenied': http_client.FORBIDDEN, @@ -80,52 +90,38 @@ def _error_result_to_exception(error_result): status_code, error_result.get('message', ''), errors=[error_result]) -class AutoDetectSchema(_TypedProperty): - """Typed Property for ``autodetect`` properties. - - :raises ValueError: on ``set`` operation if ``instance.schema`` - is already defined. - """ - def __set__(self, instance, value): - self._validate(value) - if instance.schema: - raise ValueError('A schema should not be already defined ' - 'when using schema auto-detection') - setattr(instance._configuration, self._backing_name, value) - - -class Compression(_EnumProperty): +class Compression(_EnumApiResourceProperty): """Pseudo-enum for ``compression`` properties.""" GZIP = 'GZIP' NONE = 'NONE' -class CreateDisposition(_EnumProperty): +class CreateDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``create_disposition`` properties.""" CREATE_IF_NEEDED = 'CREATE_IF_NEEDED' CREATE_NEVER = 'CREATE_NEVER' -class DestinationFormat(_EnumProperty): +class DestinationFormat(_EnumApiResourceProperty): """Pseudo-enum for ``destination_format`` properties.""" CSV = 'CSV' NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON' AVRO = 'AVRO' -class Encoding(_EnumProperty): +class Encoding(_EnumApiResourceProperty): """Pseudo-enum for ``encoding`` properties.""" UTF_8 = 'UTF-8' ISO_8559_1 = 'ISO-8559-1' -class QueryPriority(_EnumProperty): +class QueryPriority(_EnumApiResourceProperty): """Pseudo-enum for ``QueryJob.priority`` property.""" INTERACTIVE = 'INTERACTIVE' BATCH = 'BATCH' -class SourceFormat(_EnumProperty): +class SourceFormat(_EnumApiResourceProperty): """Pseudo-enum for ``source_format`` properties.""" CSV = 'CSV' DATASTORE_BACKUP = 'DATASTORE_BACKUP' @@ -133,26 +129,36 @@ class SourceFormat(_EnumProperty): AVRO = 'AVRO' -class WriteDisposition(_EnumProperty): +class WriteDisposition(_EnumApiResourceProperty): """Pseudo-enum for ``write_disposition`` properties.""" WRITE_APPEND = 'WRITE_APPEND' WRITE_TRUNCATE = 'WRITE_TRUNCATE' WRITE_EMPTY = 'WRITE_EMPTY' +class AutoDetectSchema(_TypedApiResourceProperty): + """Property for ``autodetect`` properties. + + :raises ValueError: on ``set`` operation if ``instance.schema`` + is already defined. + """ + def __set__(self, instance, value): + self._validate(value) + instance._properties[self.resource_name] = value + + class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID in the project associated with the client. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :type client: :class:`google.cloud.bigquery.Client` + :param client: A client which holds credentials and project configuration. """ - def __init__(self, name, client): + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() - self.name = name + self.job_id = job_id self._client = client self._properties = {} self._result_set = False @@ -170,12 +176,12 @@ def project(self): def _require_client(self, client): """Check client or verify over-ride. - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. - :rtype: :class:`google.cloud.bigquery.client.Client` + :rtype: :class:`google.cloud.bigquery.Client` :returns: The client passed in or the currently bound client. """ if client is None: @@ -196,9 +202,9 @@ def path(self): """URL path for the job's APIs. :rtype: str - :returns: the path based on project and job name. + :returns: the path based on project and job ID. """ - return '/projects/%s/jobs/%s' % (self.project, self.name) + return '/projects/%s/jobs/%s' % (self.project, self.job_id) @property def etag(self): @@ -266,6 +272,11 @@ def ended(self): if millis is not None: return _datetime_from_microseconds(millis * 1000.0) + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get('statistics', {}) + return statistics.get(self._JOB_TYPE, {}) + @property def error_result(self): """Error information about the job as a whole. @@ -303,6 +314,10 @@ def _scrub_local_properties(self, cleaned): """Helper: handle subclass properties in cleaned.""" pass + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + raise NotImplementedError("Abstract") + def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -322,6 +337,8 @@ def _set_properties(self, api_response): self._properties.clear() self._properties.update(cleaned) + configuration = cleaned['configuration'][self._JOB_TYPE] + self._copy_configuration_properties(configuration) # For Future interface self._set_future_result() @@ -335,7 +352,7 @@ def _get_resource_config(cls, resource): :rtype: dict :returns: tuple (string, dict), where the first element is the - job name and the second contains job-specific configuration. + job ID and the second contains job-specific configuration. :raises: :class:`KeyError` if the resource has no identifier, or is missing the appropriate configuration. """ @@ -343,25 +360,28 @@ def _get_resource_config(cls, resource): 'jobId' not in resource['jobReference']): raise KeyError('Resource lacks required identity information: ' '["jobReference"]["jobId"]') - name = resource['jobReference']['jobId'] + job_id = resource['jobReference']['jobId'] if ('configuration' not in resource or cls._JOB_TYPE not in resource['configuration']): raise KeyError('Resource lacks required configuration: ' '["configuration"]["%s"]' % cls._JOB_TYPE) config = resource['configuration'][cls._JOB_TYPE] - return name, config + return job_id, config - def begin(self, client=None): + def _begin(self, client=None, retry=DEFAULT_RETRY): """API call: begin the job via a POST request See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :raises: :exc:`ValueError` if the job has already begin. """ if self.state is not None: @@ -369,49 +389,59 @@ def begin(self, client=None): client = self._require_client(client) path = '/projects/%s/jobs' % (self.project,) - api_response = client._connection.api_request( + + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + api_response = client._call_api( + retry, method='POST', path=path, data=self._build_resource()) self._set_properties(api_response) - def exists(self, client=None): + def exists(self, client=None, retry=DEFAULT_RETRY): """API call: test for the existence of the job via a GET request See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: Boolean indicating existence of the job. """ client = self._require_client(client) try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) + client._call_api(retry, + method='GET', path=self.path, + query_params={'fields': 'id'}) except NotFound: return False else: return True - def reload(self, client=None): + def reload(self, client=None, retry=DEFAULT_RETRY): """API call: refresh job properties via a GET request. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. + + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. """ client = self._require_client(client) - api_response = client._connection.api_request( - method='GET', path=self.path) + api_response = client._call_api(retry, method='GET', path=self.path) self._set_properties(api_response) def cancel(self, client=None): @@ -420,7 +450,7 @@ def cancel(self, client=None): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - :type client: :class:`~google.cloud.bigquery.client.Client` or + :type client: :class:`~google.cloud.bigquery.Client` or ``NoneType`` :param client: the client to use. If not passed, falls back to the ``client`` stored on the current dataset. @@ -461,16 +491,19 @@ def _set_future_result(self): else: self.set_result(self) - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + :rtype: bool :returns: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE def result(self, timeout=None): @@ -488,7 +521,8 @@ def result(self, timeout=None): given timeout. """ if self.state is None: - self.begin() + self._begin() + # TODO: modify PollingFuture so it can pass a retry argument to done(). return super(_AsyncJob, self).result(timeout=timeout) def cancelled(self): @@ -505,89 +539,273 @@ def cancelled(self): and self.error_result.get('reason') == _STOPPED_REASON) -class _LoadConfiguration(object): - """User-settable configuration options for load jobs. +class LoadJobConfig(object): + """Configuration options for load jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _allow_jagged_rows = None - _allow_quoted_newlines = None - _autodetect = None - _create_disposition = None - _encoding = None - _field_delimiter = None - _ignore_unknown_values = None - _max_bad_records = None - _null_marker = None - _quote_character = None - _skip_leading_rows = None - _source_format = None - _write_disposition = None - - -class LoadTableFromStorageJob(_AsyncJob): - """Asynchronous job for loading data into a table from CloudStorage. - :type name: str - :param name: the name of the job + def __init__(self): + self._properties = {} + self._schema = () - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. + allow_jagged_rows = _TypedApiResourceProperty( + 'allow_jagged_rows', 'allowJaggedRows', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows + """ - :type source_uris: sequence of string - :param source_uris: URIs of one or more data files to be loaded, in - format ``gs:///``. + allow_quoted_newlines = _TypedApiResourceProperty( + 'allow_quoted_newlines', 'allowQuotedNewlines', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines + """ - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + autodetect = AutoDetectSchema('autodetect', 'autodetect', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect + """ - :type schema: list of :class:`google.cloud.bigquery.table.SchemaField` - :param schema: The job's schema + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition """ - _schema = None - _JOB_TYPE = 'load' + encoding = Encoding('encoding', 'encoding') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding + """ - def __init__(self, name, destination, source_uris, client, schema=()): - super(LoadTableFromStorageJob, self).__init__(name, client) - self.destination = destination - self.source_uris = source_uris - self._configuration = _LoadConfiguration() - # Let the @property do validation. This must occur after all other - # attributes have been set. - self.schema = schema + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter + """ + + ignore_unknown_values = _TypedApiResourceProperty( + 'ignore_unknown_values', 'ignoreUnknownValues', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues + """ + + max_bad_records = _TypedApiResourceProperty( + 'max_bad_records', 'maxBadRecords', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords + """ + + null_marker = _TypedApiResourceProperty( + 'null_marker', 'nullMarker', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker + """ + + quote_character = _TypedApiResourceProperty( + 'quote_character', 'quote', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote + """ + + skip_leading_rows = _TypedApiResourceProperty( + 'skip_leading_rows', 'skipLeadingRows', six.integer_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows + """ + + source_format = SourceFormat('source_format', 'sourceFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat + """ + + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition + """ @property def schema(self): - """Table's schema. - - :rtype: list of :class:`SchemaField` - :returns: fields describing the schema + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema """ return list(self._schema) @schema.setter def schema(self, value): - """Update table's schema + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') + self._schema = tuple(value) - :type value: list of :class:`SchemaField` - :param value: fields describing the schema + def to_api_repr(self): + """Build an API representation of the load job config. - :raises TypeError: If ``value`is not a sequence. - :raises ValueError: If any item in the sequence is not - a ``SchemaField``. + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. """ - if not value: - self._schema = () - else: - if not all(isinstance(field, SchemaField) for field in value): - raise ValueError('Schema items must be fields') - if self.autodetect: - raise ValueError( - 'Schema can not be set if `autodetect` property is True') + config = copy.deepcopy(self._properties) + if len(self.schema) > 0: + config['schema'] = {'fields': _build_schema_resource(self.schema)} + # skipLeadingRows is a string because it's defined as an int64, which + # can't be represented as a JSON number. + slr = config.get('skipLeadingRows') + if slr is not None: + config['skipLeadingRows'] = str(slr) + return config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + schema = resource.pop('schema', {'fields': ()}) + slr = resource.pop('skipLeadingRows', None) + config = cls() + config._properties = copy.deepcopy(resource) + config.schema = _parse_schema_resource(schema) + config.skip_leading_rows = _int_or_none(slr) + + +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table. + + Can load from Google Cloud Storage URIs or from a file. + + :type job_id: str + :param job_id: the job's ID + + :type source_uris: sequence of string or ``NoneType`` + :param source_uris: + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris + for supported URI formats. Pass None for jobs that load from a file. + + :type destination: :class:`google.cloud.bigquery.TableReference` + :param destination: reference to table into which data is to be loaded. + + :type client: :class:`google.cloud.bigquery.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + + _JOB_TYPE = 'load' + + def __init__(self, job_id, source_uris, destination, client, + job_config=None): + super(LoadJob, self).__init__(job_id, client) + + if job_config is None: + job_config = LoadJobConfig() + + self.source_uris = source_uris + self.destination = destination + self._configuration = job_config + + @property + def allow_jagged_rows(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values + + @property + def max_bad_records(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition - self._schema = tuple(value) + @property + def schema(self): + """See + :class:`~google.cloud.bigquery.LoadJobConfig.schema`. + """ + return self._configuration.schema @property def input_file_bytes(self): @@ -633,131 +851,26 @@ def output_rows(self): if statistics is not None: return int(statistics['load']['outputRows']) - allow_jagged_rows = _TypedProperty('allow_jagged_rows', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows - """ - - allow_quoted_newlines = _TypedProperty('allow_quoted_newlines', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines - """ - - autodetect = AutoDetectSchema('autodetect', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect - """ - - create_disposition = CreateDisposition('create_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition - """ - - encoding = Encoding('encoding') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding - """ - - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter - """ - - ignore_unknown_values = _TypedProperty('ignore_unknown_values', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues - """ - - max_bad_records = _TypedProperty('max_bad_records', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords - """ - - null_marker = _TypedProperty('null_marker', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker - """ - - quote_character = _TypedProperty('quote_character', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote - """ - - skip_leading_rows = _TypedProperty('skip_leading_rows', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows - """ - - source_format = SourceFormat('source_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat - """ - - write_disposition = WriteDisposition('write_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition - """ - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.allow_jagged_rows is not None: - configuration['allowJaggedRows'] = self.allow_jagged_rows - if self.allow_quoted_newlines is not None: - configuration['allowQuotedNewlines'] = self.allow_quoted_newlines - if self.autodetect is not None: - configuration['autodetect'] = self.autodetect - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.encoding is not None: - configuration['encoding'] = self.encoding - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.ignore_unknown_values is not None: - configuration['ignoreUnknownValues'] = self.ignore_unknown_values - if self.max_bad_records is not None: - configuration['maxBadRecords'] = self.max_bad_records - if self.null_marker is not None: - configuration['nullMarker'] = self.null_marker - if self.quote_character is not None: - configuration['quote'] = self.quote_character - if self.skip_leading_rows is not None: - configuration['skipLeadingRows'] = self.skip_leading_rows - if self.source_format is not None: - configuration['sourceFormat'] = self.source_format - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - def _build_resource(self): """Generate a resource for :meth:`begin`.""" - resource = { + configuration = self._configuration.to_api_repr() + if self.source_uris is not None: + configuration['sourceUris'] = self.source_uris + configuration['destinationTable'] = self.destination.to_api_repr() + + return { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceUris': self.source_uris, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, - 'tableId': self.destination.name, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - - if len(self.schema) > 0: - configuration['schema'] = { - 'fields': _build_schema_resource(self.schema)} - return resource - - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned.""" - schema = cleaned.pop('schema', {'fields': ()}) - self.schema = _parse_schema_resource(schema) + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -771,103 +884,149 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.LoadTableFromStorageJob` + :rtype: :class:`google.cloud.bigquery.LoadJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) - source_urls = config.get('sourceUris', ()) - job = cls(name, destination, source_urls, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = LoadJobConfig.from_api_repr(config_resource) + dest_config = config_resource['destinationTable'] + ds_ref = DatasetReference(dest_config['projectId'], + dest_config['datasetId'],) + destination = TableReference(ds_ref, dest_config['tableId']) + # sourceUris will be absent if this is a file upload. + source_uris = config_resource.get('sourceUris') + job = cls(job_id, source_uris, destination, client, config) job._set_properties(resource) return job -class _CopyConfiguration(object): - """User-settable configuration options for copy jobs. +class CopyJobConfig(object): + """Configuration options for copy jobs. + + All properties in this class are optional. Values which are ``None`` -> + server defaults. + """ + + def __init__(self): + self._properties = {} + + create_disposition = CreateDisposition('create_disposition', + 'createDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition + """ - Values which are ``None`` -> server defaults. + write_disposition = WriteDisposition('write_disposition', + 'writeDisposition') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition """ - _create_disposition = None - _write_disposition = None + + def to_api_repr(self): + """Build an API representation of the copy job config. + + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. - :type name: str - :param name: the name of the job - - :type destination: :class:`google.cloud.bigquery.table.Table` - :param destination: Table into which data is to be loaded. + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. - :type sources: list of :class:`google.cloud.bigquery.table.Table` + :type sources: list of :class:`google.cloud.bigquery.TableReference` :param sources: Table into which data is to be loaded. - :type client: :class:`google.cloud.bigquery.client.Client` + :type destination: :class:`google.cloud.bigquery.TableReference` + :param destination: Table into which data is to be loaded. + + :type client: :class:`google.cloud.bigquery.Client` :param client: A client which holds credentials and project configuration for the dataset (which requires a project). - """ + :type job_config: :class:`~google.cloud.bigquery.CopyJobConfig` + :param job_config: + (Optional) Extra configuration options for the copy job. + """ _JOB_TYPE = 'copy' - def __init__(self, name, destination, sources, client): - super(CopyJob, self).__init__(name, client) + def __init__(self, job_id, sources, destination, client, job_config=None): + super(CopyJob, self).__init__(job_id, client) + + if job_config is None: + job_config = CopyJobConfig() + self.destination = destination self.sources = sources - self._configuration = _CopyConfiguration() - - create_disposition = CreateDisposition('create_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition - """ + self._configuration = job_config - write_disposition = WriteDisposition('write_disposition') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition - """ + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_refs = [{ 'projectId': table.project, - 'datasetId': table.dataset_name, - 'tableId': table.name, + 'datasetId': table.dataset_id, + 'tableId': table.table_id, } for table in self.sources] - resource = { + configuration = self._configuration.to_api_repr() + configuration['sourceTables'] = source_refs + configuration['destinationTable'] = { + 'projectId': self.destination.project, + 'datasetId': self.destination.dataset_id, + 'tableId': self.destination.table_id, + } + + return { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTables': source_refs, - 'destinationTable': { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, - 'tableId': self.destination.name, - }, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) - return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -881,127 +1040,200 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.CopyJob` + :rtype: :class:`google.cloud.bigquery.CopyJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) - dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) - destination = Table(dest_config['tableId'], dataset) + job_id, config_resource = cls._get_resource_config(resource) + config = CopyJobConfig.from_api_repr(config_resource) + destination = TableReference.from_api_repr( + config_resource['destinationTable']) sources = [] - source_configs = config.get('sourceTables') + source_configs = config_resource.get('sourceTables') if source_configs is None: - single = config.get('sourceTable') + single = config_resource.get('sourceTable') if single is None: raise KeyError( "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], client) - sources.append(Table(source_config['tableId'], dataset)) - job = cls(name, destination, sources, client=client) + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + job = cls( + job_id, sources, destination, client=client, job_config=config) job._set_properties(resource) return job -class _ExtractConfiguration(object): - """User-settable configuration options for extract jobs. +class ExtractJobConfig(object): + """Configuration options for extract jobs. - Values which are ``None`` -> server defaults. + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _compression = None - _destination_format = None - _field_delimiter = None - _print_header = None + def __init__(self): + self._properties = {} + + compression = Compression('compression', 'compression') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression + """ + + destination_format = DestinationFormat( + 'destination_format', 'destinationFormat') + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat + """ + + field_delimiter = _TypedApiResourceProperty( + 'field_delimiter', 'fieldDelimiter', six.string_types) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter + """ + + print_header = _TypedApiResourceProperty( + 'print_header', 'printHeader', bool) + """See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader + """ + + def to_api_repr(self): + """Build an API representation of the extract job config. -class ExtractTableToStorageJob(_AsyncJob): + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. - :type name: str - :param name: the name of the job + :type job_id: str + :param job_id: the job's ID - :type source: :class:`google.cloud.bigquery.table.Table` + :type source: :class:`google.cloud.bigquery.TableReference` :param source: Table into which data is to be loaded. :type destination_uris: list of string - :param destination_uris: URIs describing Cloud Storage blobs into which - extracted data will be written, in format - ``gs:///``. + :param destination_uris: + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). + :type client: :class:`google.cloud.bigquery.Client` + :param client: + A client which holds credentials and project configuration. + + :type job_config: :class:`~google.cloud.bigquery.ExtractJobConfig` + :param job_config: + (Optional) Extra configuration options for the extract job. """ _JOB_TYPE = 'extract' - def __init__(self, name, source, destination_uris, client): - super(ExtractTableToStorageJob, self).__init__(name, client) + def __init__( + self, job_id, source, destination_uris, client, job_config=None): + super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + self.source = source self.destination_uris = destination_uris - self._configuration = _ExtractConfiguration() + self._configuration = job_config - compression = Compression('compression') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression - """ + @property + def compression(self): + """See + :class:`~google.cloud.bigquery.ExtractJobConfig.compression`. + """ + return self._configuration.compression - destination_format = DestinationFormat('destination_format') - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat - """ + @property + def destination_format(self): + """See + :class:`~google.cloud.bigquery.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format - field_delimiter = _TypedProperty('field_delimiter', six.string_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter - """ + @property + def field_delimiter(self): + """See + :class:`~google.cloud.bigquery.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter - print_header = _TypedProperty('print_header', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader - """ + @property + def print_header(self): + """See + :class:`~google.cloud.bigquery.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header + + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - if self.compression is not None: - configuration['compression'] = self.compression - if self.destination_format is not None: - configuration['destinationFormat'] = self.destination_format - if self.field_delimiter is not None: - configuration['fieldDelimiter'] = self.field_delimiter - if self.print_header is not None: - configuration['printHeader'] = self.print_header + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('destinationUriFileCounts') + if result is not None: + result = int(result) + return result def _build_resource(self): """Generate a resource for :meth:`begin`.""" source_ref = { 'projectId': self.source.project, - 'datasetId': self.source.dataset_name, - 'tableId': self.source.name, + 'datasetId': self.source.dataset_id, + 'tableId': self.source.table_id, } + configuration = self._configuration.to_api_repr() + configuration['sourceTable'] = source_ref + configuration['destinationUris'] = self.destination_uris + resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'sourceTable': source_ref, - 'destinationUris': self.destination_uris, - }, + self._JOB_TYPE: configuration, }, } - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) return resource + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + self._configuration._properties = copy.deepcopy(configuration) + @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1014,226 +1246,406 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.ExtractTableToStorageJob` + :rtype: :class:`google.cloud.bigquery.ExtractJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) - source_config = config['sourceTable'] - dataset = Dataset(source_config['datasetId'], client) - source = Table(source_config['tableId'], dataset) - destination_uris = config['destinationUris'] - job = cls(name, source, destination_uris, client=client) + job_id, config_resource = cls._get_resource_config(resource) + config = ExtractJobConfig.from_api_repr(config_resource) + source_config = config_resource['sourceTable'] + dataset = DatasetReference( + source_config['projectId'], source_config['datasetId']) + source = dataset.table(source_config['tableId']) + destination_uris = config_resource['destinationUris'] + + job = cls( + job_id, source, destination_uris, client=client, job_config=config) job._set_properties(resource) return job -class _AsyncQueryConfiguration(object): - """User-settable configuration options for asynchronous query jobs. +def _from_api_repr_query_parameters(resource): + return [ + _query_param_from_api_repr(mapping) + for mapping in resource + ] - Values which are ``None`` -> server defaults. - """ - _allow_large_results = None - _create_disposition = None - _default_dataset = None - _destination = None - _flatten_results = None - _priority = None - _use_query_cache = None - _use_legacy_sql = None - _dry_run = None - _write_disposition = None - _maximum_billing_tier = None - _maximum_bytes_billed = None +def _to_api_repr_query_parameters(value): + return [ + query_parameter.to_api_repr() + for query_parameter in value + ] -class QueryJob(_AsyncJob): - """Asynchronous job: query tables. - :type name: str - :param name: the name of the job +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources - :type query: str - :param query: SQL query string - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). +def _to_api_repr_udf_resources(value): + return [ + {udf_resource.udf_type: udf_resource.value} + for udf_resource in value + ] - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery._helpers.UDFResource` - (empty by default) - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + +class QueryJobConfig(object): + """Configuration options for query jobs. + + All properties in this class are optional. Values which are ``None`` -> + server defaults. """ - _JOB_TYPE = 'query' - _UDF_KEY = 'userDefinedFunctionResources' + _QUERY_PARAMETERS_KEY = 'queryParameters' + _UDF_RESOURCES_KEY = 'userDefinedFunctionResources' - def __init__(self, name, query, client, - udf_resources=(), query_parameters=()): - super(QueryJob, self).__init__(name, client) - self.query = query - self.udf_resources = udf_resources - self.query_parameters = query_parameters - self._configuration = _AsyncQueryConfiguration() - self._query_results = None + def __init__(self): + self._properties = {} + + def to_api_repr(self): + """Build an API representation of the copy job config. - allow_large_results = _TypedProperty('allow_large_results', bool) + :rtype: dict + :returns: A dictionary in the format used by the BigQuery API. + """ + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource.get(self._QUERY_PARAMETERS_KEY) + if query_parameters: + if query_parameters[0].name is None: + resource['parameterMode'] = 'POSITIONAL' + else: + resource['parameterMode'] = 'NAMED' + + for prop, convert in self._NESTED_PROPERTIES.items(): + _, to_resource = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + resource[prop] = to_resource(nested_resource) + + return resource + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + :type resource: dict + :param resource: + An extract job configuration in the same representation as is + returned from the API. + + :rtype: :class:`google.cloud.bigquery.ExtractJobConfig` + :returns: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + + for prop, convert in cls._NESTED_PROPERTIES.items(): + from_resource, _ = convert + nested_resource = resource.get(prop) + if nested_resource is not None: + config._properties[prop] = from_resource(nested_resource) + + return config + + allow_large_results = _TypedApiResourceProperty( + 'allow_large_results', 'allowLargeResults', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults """ - create_disposition = CreateDisposition('create_disposition') + create_disposition = CreateDisposition( + 'create_disposition', 'createDisposition') """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition """ - default_dataset = _TypedProperty('default_dataset', Dataset) + default_dataset = _TypedApiResourceProperty( + 'default_dataset', 'defaultDataset', DatasetReference) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.defaultDataset + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset """ - destination = _TypedProperty('destination', Table) + destination = _TypedApiResourceProperty( + 'destination', 'destinationTable', TableReference) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable """ - flatten_results = _TypedProperty('flatten_results', bool) + dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun """ - priority = QueryPriority('priority') + flatten_results = _TypedApiResourceProperty( + 'flatten_results', 'flattenResults', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults """ - query_parameters = QueryParametersProperty() - - udf_resources = UDFResourcesProperty() - - use_query_cache = _TypedProperty('use_query_cache', bool) + maximum_billing_tier = _TypedApiResourceProperty( + 'maximum_billing_tier', 'maximumBillingTier', int) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier """ - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) + maximum_bytes_billed = _TypedApiResourceProperty( + 'maximum_bytes_billed', 'maximumBytesBilled', int) """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs#configuration.query.useLegacySql + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled """ - dry_run = _TypedProperty('dry_run', bool) + priority = QueryPriority('priority', 'priority') """See - https://cloud.google.com/bigquery/docs/\ - reference/rest/v2/jobs#configuration.dryRun + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority + """ + + query_parameters = _ListApiResourceProperty( + 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter) + """ + A list of + :class:`google.cloud.bigquery.ArrayQueryParameter`, + :class:`google.cloud.bigquery.ScalarQueryParameter`, or + :class:`google.cloud.bigquery.StructQueryParameter` + (empty by default) + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters + """ + + udf_resources = _ListApiResourceProperty( + 'udf_resources', _UDF_RESOURCES_KEY, UDFResource) """ + A list of :class:`google.cloud.bigquery.UDFResource` (empty + by default) - write_disposition = WriteDisposition('write_disposition') + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources + """ + + use_legacy_sql = _TypedApiResourceProperty( + 'use_legacy_sql', 'useLegacySql', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition + https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql """ - maximum_billing_tier = _TypedProperty('maximum_billing_tier', int) + use_query_cache = _TypedApiResourceProperty( + 'use_query_cache', 'useQueryCache', bool) """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache """ - maximum_bytes_billed = _TypedProperty('maximum_bytes_billed', int) + write_disposition = WriteDisposition( + 'write_disposition', 'writeDisposition') """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition """ - def _destination_table_resource(self): - """Create a JSON resource for the destination table. - - Helper for :meth:`_populate_config_resource` and - :meth:`_scrub_local_properties` - """ - if self.destination is not None: - return { - 'projectId': self.destination.project, - 'datasetId': self.destination.dataset_name, - 'tableId': self.destination.name, - } - - def _populate_config_resource_booleans(self, configuration): - """Helper for _populate_config_resource.""" - if self.allow_large_results is not None: - configuration['allowLargeResults'] = self.allow_large_results - if self.flatten_results is not None: - configuration['flattenResults'] = self.flatten_results - if self.use_query_cache is not None: - configuration['useQueryCache'] = self.use_query_cache - if self.use_legacy_sql is not None: - configuration['useLegacySql'] = self.use_legacy_sql - - def _populate_config_resource(self, configuration): - """Helper for _build_resource: copy config properties to resource""" - self._populate_config_resource_booleans(configuration) - - if self.create_disposition is not None: - configuration['createDisposition'] = self.create_disposition - if self.default_dataset is not None: - configuration['defaultDataset'] = { - 'projectId': self.default_dataset.project, - 'datasetId': self.default_dataset.name, - } - if self.destination is not None: - table_res = self._destination_table_resource() - configuration['destinationTable'] = table_res - if self.priority is not None: - configuration['priority'] = self.priority - if self.write_disposition is not None: - configuration['writeDisposition'] = self.write_disposition - if self.maximum_billing_tier is not None: - configuration['maximumBillingTier'] = self.maximum_billing_tier - if self.maximum_bytes_billed is not None: - configuration['maximumBytesBilled'] = self.maximum_bytes_billed - if len(self._udf_resources) > 0: - configuration[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - configuration[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - configuration['parameterMode'] = 'POSITIONAL' - else: - configuration['parameterMode'] = 'NAMED' + table_definitions = _TypedApiResourceProperty( + 'table_definitions', 'tableDefinitions', dict) + """ + Definitions for external tables. A dictionary from table names (strings) + to :class:`google.cloud.bigquery.ExternalConfig`. + + See + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions + """ + + _maximum_billing_tier = None + _maximum_bytes_billed = None + + _NESTED_PROPERTIES = { + 'defaultDataset': ( + DatasetReference.from_api_repr, DatasetReference.to_api_repr), + 'destinationTable': ( + TableReference.from_api_repr, TableReference.to_api_repr), + 'maximumBytesBilled': (int, str), + 'tableDefinitions': (_from_api_repr_table_defs, + _to_api_repr_table_defs), + _QUERY_PARAMETERS_KEY: ( + _from_api_repr_query_parameters, _to_api_repr_query_parameters), + _UDF_RESOURCES_KEY: ( + _from_api_repr_udf_resources, _to_api_repr_udf_resources), + } + + +class QueryJob(_AsyncJob): + """Asynchronous job: query tables. + + :type job_id: str + :param job_id: the job's ID, within the project belonging to ``client``. + + :type query: str + :param query: SQL query string + + :type client: :class:`google.cloud.bigquery.Client` + :param client: A client which holds credentials and project configuration + for the dataset (which requires a project). + + :type job_config: :class:`~google.cloud.bigquery.QueryJobConfig` + :param job_config: + (Optional) Extra configuration options for the query job. + """ + _JOB_TYPE = 'query' + _UDF_KEY = 'userDefinedFunctionResources' + + def __init__(self, job_id, query, client, job_config=None): + super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + self.query = query + self._configuration = job_config + self._query_results = None + self._done_timeout = None + + @property + def allow_large_results(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.allow_large_results`. + """ + return self._configuration.allow_large_results + + @property + def create_disposition(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.destination`. + """ + return self._configuration.destination + + @property + def dry_run(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @property + def flatten_results(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.flatten_results`. + """ + return self._configuration.flatten_results + + @property + def priority(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.priority`. + """ + return self._configuration.priority + + @property + def query_parameters(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters + + @property + def udf_resources(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources + + @property + def use_legacy_sql(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @property + def use_query_cache(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache + + @property + def write_disposition(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def maximum_billing_tier(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier + + @property + def maximum_bytes_billed(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed + + @property + def table_definitions(self): + """See + :class:`~google.cloud.bigquery.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions def _build_resource(self): """Generate a resource for :meth:`begin`.""" + configuration = self._configuration.to_api_repr() resource = { 'jobReference': { 'projectId': self.project, - 'jobId': self.name, + 'jobId': self.job_id, }, 'configuration': { - self._JOB_TYPE: { - 'query': self.query, - }, + self._JOB_TYPE: configuration, }, } - if self.dry_run is not None: - resource['configuration']['dryRun'] = self.dry_run + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to remove it from the query config. + if 'dryRun' in configuration: + dry_run = configuration['dryRun'] + del configuration['dryRun'] + resource['configuration']['dryRun'] = dry_run - configuration = resource['configuration'][self._JOB_TYPE] - self._populate_config_resource(configuration) + configuration['query'] = self.query return resource @@ -1246,18 +1658,21 @@ def _scrub_local_properties(self, cleaned): the client's project. """ configuration = cleaned['configuration']['query'] - self.query = configuration['query'] - dest_remote = configuration.get('destinationTable') - if dest_remote is None: - if self.destination is not None: - del self.destination - else: - dest_local = self._destination_table_resource() - if dest_remote != dest_local: - dataset = self._client.dataset(dest_remote['datasetId']) - self.destination = dataset.table(dest_remote['tableId']) + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + self._configuration.dry_run = cleaned['configuration'].get('dryRun') + + def _copy_configuration_properties(self, configuration): + """Helper: assign subclass configuration properties in cleaned.""" + # The dryRun property only applies to query jobs, but it is defined at + # a level higher up. We need to copy it to the query config. + # It should already be correctly set by the _scrub_local_properties() + # method. + dry_run = self.dry_run + self._configuration = QueryJobConfig.from_api_repr(configuration) + self._configuration.dry_run = dry_run @classmethod def from_api_repr(cls, resource, client): @@ -1266,56 +1681,244 @@ def from_api_repr(cls, resource, client): :type resource: dict :param resource: dataset job representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` + :type client: :class:`google.cloud.bigquery.Client` :param client: Client which holds credentials and project configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob` + :rtype: :class:`google.cloud.bigquery.RunAsyncQueryJob` :returns: Job parsed from ``resource``. """ - name, config = cls._get_resource_config(resource) + job_id, config = cls._get_resource_config(resource) query = config['query'] - job = cls(name, query, client=client) + job = cls(job_id, query, client=client) job._set_properties(resource) return job - def query_results(self): + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan + + :rtype: list of :class:`QueryPlanEntry` + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get('queryPlan', ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesProcessed') + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesBilled') + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + + :rtype: int or None + :returns: billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('billingTier') + + @property + def cache_hit(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + + :rtype: bool or None + :returns: whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get('cacheHit') + + @property + def num_dml_affected_rows(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('numDmlAffectedRows') + if result is not None: + result = int(result) + return result + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + + :rtype: str or None + :returns: type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('statementType') + + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + datasets_by_project_name = {} + + for table in self._job_statistics().get('referencedTables', ()): + + t_project = table['projectId'] + + ds_id = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) + if t_dataset is None: + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset + + t_name = table['tableId'] + tables.append(t_dataset.table(t_name)) + + return tables + + @property + def undeclared_query_paramters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters + + :rtype: + list of + :class:`~google.cloud.bigquery.ArrayQueryParameter`, + :class:`~google.cloud.bigquery.ScalarQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` + :returns: undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get('undeclaredQueryParamters', ()) + + for parameter in undeclared: + p_type = parameter['parameterType'] + + if 'arrayType' in p_type: + klass = ArrayQueryParameter + elif 'structTypes' in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + + def query_results(self, retry=DEFAULT_RETRY): """Construct a QueryResults instance, bound to this job. - :rtype: :class:`~google.cloud.bigquery.query.QueryResults` + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the RPC. + + :rtype: :class:`~google.cloud.bigquery.QueryResults` :returns: results instance """ if not self._query_results: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results( + self.job_id, retry, project=self.project) return self._query_results - def done(self): + def done(self, retry=DEFAULT_RETRY): """Refresh the job and checks if it is complete. :rtype: bool :returns: True if the job is complete, False otherwise. """ + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + timeout = max(min(timeout, 10), 0) + self._done_timeout -= timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(timeout * 1000) + # Do not refresh is the state is already done, as the job will not # change once complete. if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results(self.name) + self._query_results = self._client._get_query_results( + self.job_id, retry, + project=self.project, timeout_ms=timeout_ms) # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload() + self.reload(retry=retry) return self.state == _DONE_STATE - def result(self, timeout=None): + def _blocking_poll(self, timeout=None): + self._done_timeout = timeout + super(QueryJob, self)._blocking_poll(timeout=timeout) + + def result(self, timeout=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. - :type timeout: int + :type timeout: float :param timeout: How long to wait for job to complete before raising a :class:`TimeoutError`. + :type retry: :class:`google.api_core.retry.Retry` + :param retry: (Optional) How to retry the call that retrieves rows. + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of row data :class:`tuple`s. During each page, the @@ -1330,4 +1933,153 @@ def result(self, timeout=None): """ super(QueryJob, self).result(timeout=timeout) # Return an iterator instead of returning the job. - return self.query_results().fetch_data() + schema = self.query_results().schema + dest_table = self.destination + return self._client.list_rows(dest_table, selected_fields=schema, + retry=retry) + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + :type kind: str + :param kind: step type + + :type substeps: + :param substeps: names of substeps + """ + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntryStep` + :return: new instance built from the resource + """ + return cls( + kind=resource.get('kind'), + substeps=resource.get('substeps', ()), + ) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """Map a single entry in a query plan. + + :type name: str + :param name: name of the entry + + :type entry_id: int + :param entry_id: ID of the entry + + :type wait_ratio_avg: float + :param wait_ratio_avg: average wait ratio + + :type wait_ratio_max: float + :param wait_ratio_avg: maximum wait ratio + + :type read_ratio_avg: float + :param read_ratio_avg: average read ratio + + :type read_ratio_max: float + :param read_ratio_avg: maximum read ratio + + :type copute_ratio_avg: float + :param copute_ratio_avg: average copute ratio + + :type copute_ratio_max: float + :param copute_ratio_avg: maximum copute ratio + + :type write_ratio_avg: float + :param write_ratio_avg: average write ratio + + :type write_ratio_max: float + :param write_ratio_avg: maximum write ratio + + :type records_read: int + :param records_read: number of records read + + :type records_written: int + :param records_written: number of records written + + :type status: str + :param status: entry status + + :type steps: List(QueryPlanEntryStep) + :param steps: steps in the entry + """ + def __init__(self, + name, + entry_id, + wait_ratio_avg, + wait_ratio_max, + read_ratio_avg, + read_ratio_max, + compute_ratio_avg, + compute_ratio_max, + write_ratio_avg, + write_ratio_max, + records_read, + records_written, + status, + steps): + self.name = name + self.entry_id = entry_id + self.wait_ratio_avg = wait_ratio_avg + self.wait_ratio_max = wait_ratio_max + self.read_ratio_avg = read_ratio_avg + self.read_ratio_max = read_ratio_max + self.compute_ratio_avg = compute_ratio_avg + self.compute_ratio_max = compute_ratio_max + self.write_ratio_avg = write_ratio_avg + self.write_ratio_max = write_ratio_max + self.records_read = records_read + self.records_written = records_written + self.status = status + self.steps = steps + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + :type resource: dict + :param resource: JSON representation of the entry + + :rtype: :class:`QueryPlanEntry` + :return: new instance built from the resource + """ + records_read = resource.get('recordsRead') + if records_read is not None: + records_read = int(records_read) + + records_written = resource.get('recordsWritten') + if records_written is not None: + records_written = int(records_written) + + return cls( + name=resource.get('name'), + entry_id=resource.get('id'), + wait_ratio_avg=resource.get('waitRatioAvg'), + wait_ratio_max=resource.get('waitRatioMax'), + read_ratio_avg=resource.get('readRatioAvg'), + read_ratio_max=resource.get('readRatioMax'), + compute_ratio_avg=resource.get('computeRatioAvg'), + compute_ratio_max=resource.get('computeRatioMax'), + write_ratio_avg=resource.get('writeRatioAvg'), + write_ratio_max=resource.get('writeRatioMax'), + records_read=records_read, + records_written=records_written, + status=resource.get('status'), + steps=[QueryPlanEntryStep.from_api_repr(step) + for step in resource.get('steps', ())], + ) diff --git a/bigquery/google/cloud/bigquery/query.py b/bigquery/google/cloud/bigquery/query.py index b99cb3c5630e..0b8808dd44a9 100644 --- a/bigquery/google/cloud/bigquery/query.py +++ b/bigquery/google/cloud/bigquery/query.py @@ -12,121 +12,477 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Define API Queries.""" +"""BigQuery query processing.""" -import six +from collections import OrderedDict +import copy -from google.api_core import page_iterator -from google.cloud.bigquery._helpers import _TypedProperty -from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery.dataset import Dataset -from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.table import _parse_schema_resource -from google.cloud.bigquery._helpers import QueryParametersProperty -from google.cloud.bigquery._helpers import UDFResourcesProperty -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start +from google.cloud.bigquery._helpers import _rows_from_json +from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON +from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM + +class UDFResource(object): + """Describe a single user-defined function (UDF) resource. -class _SyncQueryConfiguration(object): - """User-settable configuration options for synchronous query jobs. + :type udf_type: str + :param udf_type: the type of the resource ('inlineCode' or 'resourceUri') - Values which are ``None`` -> server defaults. + :type value: str + :param value: the inline code or resource URI. + + See + https://cloud.google.com/bigquery/user-defined-functions#api """ - _default_dataset = None - _dry_run = None - _max_results = None - _timeout_ms = None - _preserve_nulls = None - _use_query_cache = None - _use_legacy_sql = None + def __init__(self, udf_type, value): + self.udf_type = udf_type + self.value = value + def __eq__(self, other): + if not isinstance(other, UDFResource): + return NotImplemented + return( + self.udf_type == other.udf_type and + self.value == other.value) -class QueryResults(object): - """Synchronous job: query tables. + def __ne__(self, other): + return not self == other + + +class _AbstractQueryParameter(object): + """Base class for named / positional query parameters. + """ + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + """ + raise NotImplementedError - :type query: str - :param query: SQL query string - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: A client which holds credentials and project configuration - for the dataset (which requires a project). +class ScalarQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for scalar values. - :type udf_resources: tuple - :param udf_resources: An iterable of - :class:`google.cloud.bigquery.job.UDFResource` - (empty by default) + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). - :type query_parameters: tuple - :param query_parameters: - An iterable of - :class:`google.cloud.bigquery._helpers.AbstractQueryParameter` - (empty by default) + :type type_: str + :param type_: name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. """ + def __init__(self, name, type_, value): + self.name = name + self.type_ = type_ + self.value = value - _UDF_KEY = 'userDefinedFunctionResources' - _QUERY_PARAMETERS_KEY = 'queryParameters' + @classmethod + def positional(cls, type_, value): + """Factory for positional paramater. - def __init__(self, query, client, udf_resources=(), query_parameters=()): - self._client = client - self._properties = {} - self.query = query - self._configuration = _SyncQueryConfiguration() - self.udf_resources = udf_resources - self.query_parameters = query_parameters - self._job = None + :type type_: str + :param type_: + name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. + + :type value: str, int, float, bool, :class:`datetime.datetime`, or + :class:`datetime.date`. + :param value: the scalar parameter value. + + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :returns: instance without name + """ + return cls(None, type_, value) @classmethod - def from_api_repr(cls, api_response, client): - instance = cls(None, client) - instance._set_properties(api_response) - return instance + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter` + :returns: instance + """ + name = resource.get('name') + type_ = resource['parameterType']['type'] + value = resource['parameterValue']['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + return cls(name, type_, converted) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + value = self.value + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) + if converter is not None: + value = converter(value) + resource = { + 'parameterType': { + 'type': self.type_, + }, + 'parameterValue': { + 'value': value, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this + :class:`~google.cloud.bigquery.ScalarQueryParameter`. + """ + return ( + self.name, + self.type_.upper(), + self.value, + ) + + def __eq__(self, other): + if not isinstance(other, ScalarQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ScalarQueryParameter{}'.format(self._key()) + + +class ArrayQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for array values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type. + :param values: the parameter array values. + """ + def __init__(self, name, array_type, values): + self.name = name + self.array_type = array_type + self.values = values + + @classmethod + def positional(cls, array_type, values): + """Factory for positional parameters. + + :type array_type: str + :param array_type: + name of type of array elements. One of `'STRING'`, `'INT64'`, + `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + + :type values: list of appropriate scalar type + :param values: the parameter array values. + + :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` + :returns: instance without name + """ + return cls(None, array_type, values) + + @classmethod + def _from_api_repr_struct(cls, resource): + name = resource.get('name') + converted = [] + # We need to flatten the array to use the StructQueryParameter + # parse code. + resource_template = { + # The arrayType includes all the types of the fields of the STRUCT + 'parameterType': resource['parameterType']['arrayType'] + } + for array_value in resource['parameterValue']['arrayValues']: + struct_resource = copy.deepcopy(resource_template) + struct_resource['parameterValue'] = array_value + struct_value = StructQueryParameter.from_api_repr(struct_resource) + converted.append(struct_value) + return cls(name, 'STRUCT', converted) + + @classmethod + def _from_api_repr_scalar(cls, resource): + name = resource.get('name') + array_type = resource['parameterType']['arrayType']['type'] + values = [ + value['value'] + for value + in resource['parameterValue']['arrayValues']] + converted = [ + _QUERY_PARAMS_FROM_JSON[array_type](value, None) + for value in values + ] + return cls(name, array_type, converted) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`~google.cloud.bigquery.ArrayQueryParameter` + :returns: instance + """ + array_type = resource['parameterType']['arrayType']['type'] + if array_type == 'STRUCT': + return cls._from_api_repr_struct(resource) + return cls._from_api_repr_scalar(resource) + + def to_api_repr(self): + """Construct JSON API representation for the parameter. + + :rtype: dict + :returns: JSON mapping + """ + values = self.values + if self.array_type == 'RECORD' or self.array_type == 'STRUCT': + reprs = [value.to_api_repr() for value in values] + a_type = reprs[0]['parameterType'] + a_values = [repr_['parameterValue'] for repr_ in reprs] + else: + a_type = {'type': self.array_type} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + if converter is not None: + values = [converter(value) for value in values] + a_values = [{'value': value} for value in values] + resource = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': a_type, + }, + 'parameterValue': { + 'arrayValues': a_values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this + :class:`~google.cloud.bigquery.ArrayQueryParameter`. + """ + return ( + self.name, + self.array_type.upper(), + self.values, + ) + + def __eq__(self, other): + if not isinstance(other, ArrayQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'ArrayQueryParameter{}'.format(self._key()) + + +class StructQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for struct values. + + :type name: str or None + :param name: Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + + :type sub_params: + tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` + :param sub_params: the sub-parameters for the struct + """ + def __init__(self, name, *sub_params): + self.name = name + types = self.struct_types = OrderedDict() + values = self.struct_values = {} + for sub in sub_params: + if isinstance(sub, self.__class__): + types[sub.name] = 'STRUCT' + values[sub.name] = sub + elif isinstance(sub, ArrayQueryParameter): + types[sub.name] = 'ARRAY' + values[sub.name] = sub + else: + types[sub.name] = sub.type_ + values[sub.name] = sub.value @classmethod - def from_query_job(cls, job): - """Factory: construct from an existing job. + def positional(cls, *sub_params): + """Factory for positional parameters. - :type job: :class:`~google.cloud.bigquery.job.QueryJob` - :param job: existing job + :type sub_params: + tuple of :class:`~google.cloud.bigquery.ScalarQueryParameter`, + :class:`~google.cloud.bigquery.ArrayQueryParameter`, or + :class:`~google.cloud.bigquery.StructQueryParameter` + :param sub_params: the sub-parameters for the struct - :rtype: :class:`QueryResults` - :returns: the instance, bound to the job + :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` + :returns: instance without name """ - instance = cls(job.query, job._client, job.udf_resources) - instance._job = job - job_ref = instance._properties.setdefault('jobReference', {}) - job_ref['jobId'] = job.name - if job.default_dataset is not None: - instance.default_dataset = job.default_dataset - if job.use_query_cache is not None: - instance.use_query_cache = job.use_query_cache - if job.use_legacy_sql is not None: - instance.use_legacy_sql = job.use_legacy_sql + return cls(None, *sub_params) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter from JSON resource. + + :type resource: dict + :param resource: JSON mapping of parameter + + :rtype: :class:`~google.cloud.bigquery.StructQueryParameter` + :returns: instance + """ + name = resource.get('name') + instance = cls(name) + type_resources = {} + types = instance.struct_types + for item in resource['parameterType']['structTypes']: + types[item['name']] = item['type']['type'] + type_resources[item['name']] = item['type'] + struct_values = resource['parameterValue']['structValues'] + for key, value in struct_values.items(): + type_ = types[key] + converted = None + if type_ == 'STRUCT': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = StructQueryParameter.from_api_repr(struct_resource) + elif type_ == 'ARRAY': + struct_resource = { + 'name': key, + 'parameterType': type_resources[key], + 'parameterValue': value, + } + converted = ArrayQueryParameter.from_api_repr(struct_resource) + else: + value = value['value'] + converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + instance.struct_values[key] = converted return instance - @property - def project(self): - """Project bound to the job. + def to_api_repr(self): + """Construct JSON API representation for the parameter. - :rtype: str - :returns: the project (derived from the client). + :rtype: dict + :returns: JSON mapping """ - return self._client.project + s_types = {} + values = {} + for name, value in self.struct_values.items(): + type_ = self.struct_types[name] + if type_ in ('STRUCT', 'ARRAY'): + repr_ = value.to_api_repr() + s_types[name] = {'name': name, 'type': repr_['parameterType']} + values[name] = repr_['parameterValue'] + else: + s_types[name] = {'name': name, 'type': {'type': type_}} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) + if converter is not None: + value = converter(value) + values[name] = {'value': value} + + resource = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [s_types[key] for key in self.struct_types], + }, + 'parameterValue': { + 'structValues': values, + }, + } + if self.name is not None: + resource['name'] = self.name + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this + :class:`~google.cloud.biquery.ArrayQueryParameter`. + """ + return ( + self.name, + self.struct_types, + self.struct_values, + ) + + def __eq__(self, other): + if not isinstance(other, StructQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return 'StructQueryParameter{}'.format(self._key()) + + +class QueryResults(object): + """Results of a query. + + See: + https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults + """ - def _require_client(self, client): - """Check client or verify over-ride. + def __init__(self, properties): + self._properties = {} + self._set_properties(properties) - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + @classmethod + def from_api_repr(cls, api_response): + return cls(api_response) + + @property + def project(self): + """Project bound to the query job. - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. + :rtype: str + :returns: the project that the query job is associated with. """ - if client is None: - client = self._client - return client + return self._properties.get('jobReference', {}).get('projectId') @property def cache_hit(self): @@ -168,33 +524,17 @@ def errors(self): return self._properties.get('errors') @property - def name(self): - """Job name, generated by the back-end. + def job_id(self): + """Job ID of the query job these results are from. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference - :rtype: list of mapping, or ``NoneType`` - :returns: Mappings describing errors generated on the server (None - until set by the server). + :rtype: string + :returns: Job ID of the query job. """ return self._properties.get('jobReference', {}).get('jobId') - @property - def job(self): - """Job instance used to run the query. - - :rtype: :class:`google.cloud.bigquery.job.QueryJob`, or ``NoneType`` - :returns: Job instance used to run the query (None until - ``jobReference`` property is set by the server). - """ - if self._job is None: - job_ref = self._properties.get('jobReference') - if job_ref is not None: - self._job = QueryJob(job_ref['jobId'], self.query, - self._client) - return self._job - @property def page_token(self): """Token for fetching next bach of results. @@ -256,7 +596,7 @@ def rows(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows - :rtype: list of tuples of row values, or ``NoneType`` + :rtype: list of :class:`~google.cloud.bigquery.Row` :returns: fields describing the schema (None until set by the server). """ return _rows_from_json(self._properties.get('rows', ()), self.schema) @@ -273,209 +613,30 @@ def schema(self): """ return _parse_schema_resource(self._properties.get('schema', {})) - default_dataset = _TypedProperty('default_dataset', Dataset) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#defaultDataset - """ - - dry_run = _TypedProperty('dry_run', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#dryRun - """ - - max_results = _TypedProperty('max_results', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#maxResults - """ - - preserve_nulls = _TypedProperty('preserve_nulls', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#preserveNulls - """ - - query_parameters = QueryParametersProperty() - - timeout_ms = _TypedProperty('timeout_ms', six.integer_types) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#timeoutMs - """ - - udf_resources = UDFResourcesProperty() - - use_query_cache = _TypedProperty('use_query_cache', bool) - """See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#useQueryCache - """ - - use_legacy_sql = _TypedProperty('use_legacy_sql', bool) - """See - https://cloud.google.com/bigquery/docs/\ - reference/v2/jobs/query#useLegacySql - """ - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` :type api_response: dict :param api_response: response returned from an API call """ - self._properties.clear() - self._properties.update(api_response) - - def _build_resource(self): - """Generate a resource for :meth:`begin`.""" - resource = {'query': self.query} - - if self.default_dataset is not None: - resource['defaultDataset'] = { - 'projectId': self.project, - 'datasetId': self.default_dataset.name, - } - - if self.max_results is not None: - resource['maxResults'] = self.max_results - - if self.preserve_nulls is not None: - resource['preserveNulls'] = self.preserve_nulls - - if self.timeout_ms is not None: - resource['timeoutMs'] = self.timeout_ms - - if self.use_query_cache is not None: - resource['useQueryCache'] = self.use_query_cache - - if self.use_legacy_sql is not None: - resource['useLegacySql'] = self.use_legacy_sql - - if self.dry_run is not None: - resource['dryRun'] = self.dry_run - - if len(self._udf_resources) > 0: - resource[self._UDF_KEY] = [ - {udf_resource.udf_type: udf_resource.value} - for udf_resource in self._udf_resources - ] - if len(self._query_parameters) > 0: - resource[self._QUERY_PARAMETERS_KEY] = [ - query_parameter.to_api_repr() - for query_parameter in self._query_parameters - ] - if self._query_parameters[0].name is None: - resource['parameterMode'] = 'POSITIONAL' - else: - resource['parameterMode'] = 'NAMED' - - return resource - - def run(self, client=None): - """API call: run the query via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - if self.job is not None: - raise ValueError("Query job is already running.") - - client = self._require_client(client) - path = '/projects/%s/queries' % (self.project,) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - - def fetch_data(self, max_results=None, page_token=None, start_index=None, - timeout_ms=None, client=None): - """API call: fetch a page of query result data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults - - :type max_results: int - :param max_results: (Optional) maximum number of rows to return. - - :type page_token: str - :param page_token: - (Optional) token representing a cursor into the table's rows. - - :type start_index: int - :param start_index: (Optional) zero-based index of starting row - - :type timeout_ms: int - :param timeout_ms: - (Optional) How long to wait for the query to complete, in - milliseconds, before the request times out and returns. Note that - this is only a timeout for the request, not the query. If the query - takes longer to run than the timeout value, the call returns - without any results and with the 'jobComplete' flag set to false. - You can call GetQueryResults() to wait for the query to complete - and read the results. The default value is 10000 milliseconds (10 - seconds). - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in - the current page: ``iterator.page.num_items``). - :raises: ValueError if the query has not yet been executed. - """ - if self.name is None: - raise ValueError("Query not yet executed: call 'run()'") - - client = self._require_client(client) - params = {} - - if start_index is not None: - params['startIndex'] = start_index - - if timeout_ms is not None: - params['timeoutMs'] = timeout_ms - - if max_results is not None: - params['maxResults'] = max_results + job_id_present = ( + 'jobReference' in api_response + and 'jobId' in api_response['jobReference'] + and 'projectId' in api_response['jobReference']) + if not job_id_present: + raise ValueError('QueryResult requires a job reference') - path = '/projects/%s/queries/%s' % (self.project, self.name) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start_query, - next_token='pageToken', - extra_params=params) - iterator.query_result = self - return iterator - - -def _rows_page_start_query(iterator, page, response): - """Update query response when :class:`~google.cloud.iterator.Page` starts. - - .. note:: - - This assumes that the ``query_response`` attribute has been - added to the iterator after being created, which - should be done by the caller. - - :type iterator: :class:`~google.api_core.page_iterator.Iterator` - :param iterator: The iterator that is currently in use. - - :type page: :class:`~google.api_core.page_iterator.Page` - :param page: The page that was just created. - - :type response: dict - :param response: The JSON API response for a page of rows in a table. - """ - iterator.query_result._set_properties(response) - iterator.schema = iterator.query_result.schema - _rows_page_start(iterator, page, response) + self._properties.clear() + self._properties.update(copy.deepcopy(api_response)) + + +def _query_param_from_api_repr(resource): + """Helper: construct concrete query parameter from JSON resource.""" + qp_type = resource['parameterType'] + if 'arrayType' in qp_type: + klass = ArrayQueryParameter + elif 'structTypes' in qp_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + return klass.from_api_repr(resource) diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py index 4aea34ac22e0..a9dc7b2eac1f 100644 --- a/bigquery/google/cloud/bigquery/schema.py +++ b/bigquery/google/cloud/bigquery/schema.py @@ -32,7 +32,7 @@ class SchemaField(object): :type description: str :param description: optional description for the field. - :type fields: tuple of :class:`SchemaField` + :type fields: tuple of :class:`~google.cloud.bigquery.SchemaField` :param fields: subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode='NULLABLE', @@ -126,7 +126,7 @@ def to_api_repr(self): return answer def _key(self): - """A tuple key that unique-ly describes this field. + """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. @@ -154,3 +154,50 @@ def __hash__(self): def __repr__(self): return 'SchemaField{}'.format(self._key()) + + +def _parse_schema_resource(info): + """Parse a resource fragment into a schema field. + + :type info: mapping + :param info: should contain a "fields" key to be parsed + + :rtype: list of :class:`SchemaField`, or ``NoneType`` + :returns: a list of parsed fields, or ``None`` if no "fields" key is + present in ``info``. + """ + if 'fields' not in info: + return () + + schema = [] + for r_field in info['fields']: + name = r_field['name'] + field_type = r_field['type'] + mode = r_field.get('mode', 'NULLABLE') + description = r_field.get('description') + sub_fields = _parse_schema_resource(r_field) + schema.append( + SchemaField(name, field_type, mode, description, sub_fields)) + return schema + + +def _build_schema_resource(fields): + """Generate a resource fragment for a schema. + + :type fields: sequence of :class:`SchemaField` + :param fields: schema to be dumped + + :rtype: mapping + :returns: a mapping describing the schema of the supplied fields. + """ + infos = [] + for field in fields: + info = {'name': field.name, + 'type': field.field_type, + 'mode': field.mode} + if field.description is not None: + info['description'] = field.description + if field.fields: + info['fields'] = _build_schema_resource(field.fields) + infos.append(info) + return infos diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 20075bc50bd5..238832ea23ba 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -14,38 +14,136 @@ """Define API Datasets.""" +from __future__ import absolute_import + import datetime -import os import six -from google import resumable_media -from google.resumable_media.requests import MultipartUpload -from google.resumable_media.requests import ResumableUpload - -from google.api_core import page_iterator -from google.cloud import exceptions from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _millis_from_datetime +from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery._helpers import _item_to_row -from google.cloud.bigquery._helpers import _rows_page_start -from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW +from google.cloud.bigquery.schema import _build_schema_resource +from google.cloud.bigquery.schema import _parse_schema_resource +from google.cloud.bigquery.external_config import ExternalConfig -_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'table.reload()'" +_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'" _MARKER = object() -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB -_BASE_UPLOAD_TEMPLATE = ( - u'https://www.googleapis.com/upload/bigquery/v2/projects/' - u'{project}/jobs?uploadType=') -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart' -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable' -_GENERIC_CONTENT_TYPE = u'*/*' -_READ_LESS_THAN_SIZE = ( - 'Size {:d} was specified but the file-like object only had ' - '{:d} bytes remaining.') -_DEFAULT_NUM_RETRIES = 6 + + +class TableReference(object): + """TableReferences are pointers to tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables + + :type dataset_ref: :class:`google.cloud.bigquery.DatasetReference` + :param dataset_ref: a pointer to the dataset + + :type table_id: str + :param table_id: the ID of the table + """ + + def __init__(self, dataset_ref, table_id): + self._project = dataset_ref.project + self._dataset_id = dataset_ref.dataset_id + self._table_id = table_id + + @property + def project(self): + """Project bound to the table. + + :rtype: str + :returns: the project (derived from the dataset reference). + """ + return self._project + + @property + def dataset_id(self): + """ID of dataset containing the table. + + :rtype: str + :returns: the ID (derived from the dataset reference). + """ + return self._dataset_id + + @property + def table_id(self): + """Table ID. + + :rtype: str + :returns: the table ID. + """ + return self._table_id + + @property + def path(self): + """URL path for the table's APIs. + + :rtype: str + :returns: the path based on project, dataset and table IDs. + """ + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a table reference given its API representation + + :type resource: dict + :param resource: table reference representation returned from the API + + :rtype: :class:`google.cloud.bigquery.TableReference` + :returns: Table reference parsed from ``resource``. + """ + from google.cloud.bigquery.dataset import DatasetReference + + project = resource['projectId'] + dataset_id = resource['datasetId'] + table_id = resource['tableId'] + return cls(DatasetReference(project, dataset_id), table_id) + + def to_api_repr(self): + """Construct the API resource representation of this table reference. + + :rtype: dict + :returns: Table reference as represented as an API resource + """ + return { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self._table_id, + } + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + tuple: The contents of this :class:`DatasetReference`. + """ + return ( + self._project, + self._dataset_id, + self._table_id, + ) + + def __eq__(self, other): + if not isinstance(other, TableReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return 'TableReference{}'.format(self._key()) class Table(object): @@ -54,22 +152,27 @@ class Table(object): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables - :type name: str - :param name: the name of the table + :type table_ref: :class:`google.cloud.bigquery.TableReference` + :param table_ref: a pointer to a table - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` - :param dataset: The dataset which contains the table. - - :type schema: list of :class:`SchemaField` + :type schema: list of :class:`~google.cloud.bigquery.SchemaField` :param schema: The table's schema """ _schema = None - def __init__(self, name, dataset, schema=()): - self.name = name - self._dataset = dataset - self._properties = {} + all_fields = [ + 'description', 'friendly_name', 'expires', 'location', + 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema', + 'external_data_configuration', 'labels', + ] + + def __init__(self, table_ref, schema=()): + self._project = table_ref.project + self._table_id = table_ref.table_id + self._dataset_id = table_ref.dataset_id + self._external_config = None + self._properties = {'labels': {}} # Let the @property do validation. self.schema = schema @@ -80,31 +183,41 @@ def project(self): :rtype: str :returns: the project (derived from the dataset). """ - return self._dataset.project + return self._project @property - def dataset_name(self): - """Name of dataset containing the table. + def dataset_id(self): + """ID of dataset containing the table. :rtype: str :returns: the ID (derived from the dataset). """ - return self._dataset.name + return self._dataset_id + + @property + def table_id(self): + """ID of the table. + + :rtype: str + :returns: the table ID. + """ + return self._table_id @property def path(self): """URL path for the table's APIs. :rtype: str - :returns: the path based on project and dataste name. + :returns: the path based on project, dataset and table IDs. """ - return '%s/tables/%s' % (self._dataset.path, self.name) + return '/projects/%s/datasets/%s/tables/%s' % ( + self._project, self._dataset_id, self._table_id) @property def schema(self): """Table's schema. - :rtype: list of :class:`SchemaField` + :rtype: list of :class:`~google.cloud.bigquery.SchemaField` :returns: fields describing the schema """ return list(self._schema) @@ -113,15 +226,44 @@ def schema(self): def schema(self, value): """Update table's schema - :type value: list of :class:`SchemaField` + :type value: list of :class:`~google.cloud.bigquery.SchemaField` :param value: fields describing the schema :raises: TypeError if 'value' is not a sequence, or ValueError if any item in the sequence is not a SchemaField """ - if not all(isinstance(field, SchemaField) for field in value): + if value is None: + self._schema = () + elif not all(isinstance(field, SchemaField) for field in value): raise ValueError('Schema items must be fields') - self._schema = tuple(value) + else: + self._schema = tuple(value) + + @property + def labels(self): + """Labels for the table. + + This method always returns a dict. To change a table's labels, + modify the dict, then call ``Client.update_table``. To delete a + label, set its value to ``None`` before updating. + + :rtype: dict, {str -> str} + :returns: A dict of the the table's labels. + """ + return self._properties['labels'] + + @labels.setter + def labels(self, value): + """Update labels for the table. + + :type value: dict, {str -> str} + :param value: new labels + + :raises: ValueError for invalid value types. + """ + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties['labels'] = value @property def created(self): @@ -188,11 +330,11 @@ def self_link(self): return self._properties.get('selfLink') @property - def table_id(self): - """ID for the table resource. + def full_table_id(self): + """ID for the table, in the form ``project_id:dataset_id:table_id``. :rtype: str, or ``NoneType`` - :returns: the ID (None until set from the server). + :returns: the full ID (None until set from the server). """ return self._properties.get('id') @@ -244,7 +386,7 @@ def partition_expiration(self, value): """Update the experation time in ms for a partition :type value: int - :param value: partition experiation time in ms + :param value: partition experiation time in milliseconds """ if not isinstance(value, (int, type(None))): raise ValueError( @@ -353,6 +495,9 @@ def location(self, value): def view_query(self): """SQL query defining the table as a view. + By default, the query is treated as Standard SQL. To use Legacy + SQL, set view_use_legacy_sql to True. + :rtype: str, or ``NoneType`` :returns: The query as set by the user, or None (the default). """ @@ -371,9 +516,14 @@ def view_query(self, value): """ if not isinstance(value, six.string_types): raise ValueError("Pass a string") - if self._properties.get('view') is None: - self._properties['view'] = {} - self._properties['view']['query'] = value + view = self._properties.get('view') + if view is None: + view = self._properties['view'] = {} + view['query'] = value + # The service defaults useLegacySql to True, but this + # client uses Standard SQL by default. + if view.get('useLegacySql') is None: + view['useLegacySql'] = False @view_query.deleter def view_query(self): @@ -382,26 +532,29 @@ def view_query(self): @property def view_use_legacy_sql(self): - """Specifies whether to execute the view with legacy or standard SQL. + """Specifies whether to execute the view with Legacy or Standard SQL. - If not set, None is returned. BigQuery's default mode is equivalent to - useLegacySql = True. + The default is False for views (use Standard SQL). + If this table is not a view, None is returned. - :rtype: bool, or ``NoneType`` - :returns: The boolean for view.useLegacySql as set by the user, or - None (the default). + :rtype: bool or ``NoneType`` + :returns: The boolean for view.useLegacySql, or None if not a view. """ view = self._properties.get('view') if view is not None: - return view.get('useLegacySql') + # useLegacySql is never missing from the view dict if this table + # was created client-side, because the view_query setter populates + # it. So a missing or None can only come from the server, whose + # default is True. + return view.get('useLegacySql', True) @view_use_legacy_sql.setter def view_use_legacy_sql(self, value): """Update the view sub-property 'useLegacySql'. - This boolean specifies whether to execute the view with legacy SQL - (True) or standard SQL (False). The default, if not specified, is - 'True'. + This boolean specifies whether to execute the view with Legacy SQL + (True) or Standard SQL (False). The default, if not specified, is + 'False'. :type value: bool :param value: The boolean for view.useLegacySql @@ -414,60 +567,68 @@ def view_use_legacy_sql(self, value): self._properties['view'] = {} self._properties['view']['useLegacySql'] = value - def list_partitions(self, client=None): - """List the partitions in a table. + @property + def streaming_buffer(self): + """Information about a table's streaming buffer. + + :rtype: :class:`~google.cloud.bigquery.StreamingBuffer` + :returns: Streaming buffer information, returned from get_table. + """ + sb = self._properties.get('streamingBuffer') + if sb is not None: + return StreamingBuffer(sb) - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. + @property + def external_data_configuration(self): + """Configuration for an external data source. - :rtype: list - :returns: a list of time partitions + If not set, None is returned. + + :rtype: :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` + :returns: The external configuration, or None (the default). + """ + return self._external_config + + @external_data_configuration.setter + def external_data_configuration(self, value): + """Sets the configuration for an external data source. + + :type value: + :class:`~google.cloud.bigquery.ExternalConfig`, or ``NoneType`` + :param value: The ExternalConfig, or None to unset. """ - query = self._require_client(client).run_sync_query( - 'SELECT partition_id from [%s.%s$__PARTITIONS_SUMMARY__]' % - (self.dataset_name, self.name)) - query.run() - return [row[0] for row in query.rows] + if not (value is None or isinstance(value, ExternalConfig)): + raise ValueError("Pass an ExternalConfig or None") + self._external_config = value @classmethod - def from_api_repr(cls, resource, dataset): + def from_api_repr(cls, resource): """Factory: construct a table given its API representation :type resource: dict :param resource: table resource representation returned from the API - :type dataset: :class:`google.cloud.bigquery.dataset.Dataset` + :type dataset: :class:`google.cloud.bigquery.Dataset` :param dataset: The dataset containing the table. - :rtype: :class:`google.cloud.bigquery.table.Table` + :rtype: :class:`google.cloud.bigquery.Table` :returns: Table parsed from ``resource``. """ + from google.cloud.bigquery import dataset + if ('tableReference' not in resource or 'tableId' not in resource['tableReference']): raise KeyError('Resource lacks required identity information:' '["tableReference"]["tableId"]') - table_name = resource['tableReference']['tableId'] - table = cls(table_name, dataset=dataset) + project_id = resource['tableReference']['projectId'] + table_id = resource['tableReference']['tableId'] + dataset_id = resource['tableReference']['datasetId'] + dataset_ref = dataset.DatasetReference(project_id, dataset_id) + + table = cls(dataset_ref.table(table_id)) table._set_properties(resource) return table - def _require_client(self, client): - """Check client or verify over-ride. - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: :class:`google.cloud.bigquery.client.Client` - :returns: The client passed in or the currently bound client. - """ - if client is None: - client = self._dataset._client - return client - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -478,872 +639,123 @@ def _set_properties(self, api_response): cleaned = api_response.copy() schema = cleaned.pop('schema', {'fields': ()}) self.schema = _parse_schema_resource(schema) + ec = cleaned.pop('externalDataConfiguration', None) + if ec: + self.external_data_configuration = ExternalConfig.from_api_repr(ec) if 'creationTime' in cleaned: cleaned['creationTime'] = float(cleaned['creationTime']) if 'lastModifiedTime' in cleaned: cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime']) if 'expirationTime' in cleaned: cleaned['expirationTime'] = float(cleaned['expirationTime']) + if 'labels' not in cleaned: + cleaned['labels'] = {} self._properties.update(cleaned) - def _build_resource(self): - """Generate a resource for ``create`` or ``update``.""" - resource = { - 'tableReference': { - 'projectId': self._dataset.project, - 'datasetId': self._dataset.name, - 'tableId': self.name}, - } - if self.description is not None: - resource['description'] = self.description - - if self.expires is not None: - value = _millis_from_datetime(self.expires) - resource['expirationTime'] = value + def _populate_expires_resource(self, resource): + resource['expirationTime'] = _millis_from_datetime(self.expires) - if self.friendly_name is not None: - resource['friendlyName'] = self.friendly_name + def _populate_partitioning_type_resource(self, resource): + resource['timePartitioning'] = self._properties.get('timePartitioning') - if self.location is not None: - resource['location'] = self.location + def _populate_view_use_legacy_sql_resource(self, resource): + if 'view' not in resource: + resource['view'] = {} + resource['view']['useLegacySql'] = self.view_use_legacy_sql - if self.partitioning_type is not None: - resource['timePartitioning'] = self._properties['timePartitioning'] + def _populate_view_query_resource(self, resource): + if self.view_query is None: + resource['view'] = None + return + if 'view' not in resource: + resource['view'] = {} + resource['view']['query'] = self.view_query - if self.view_query is not None: - view = resource['view'] = {} - view['query'] = self.view_query - if self.view_use_legacy_sql is not None: - view['useLegacySql'] = self.view_use_legacy_sql - - if self._schema: + def _populate_schema_resource(self, resource): + if not self._schema: + resource['schema'] = None + else: resource['schema'] = { - 'fields': _build_schema_resource(self._schema) + 'fields': _build_schema_resource(self._schema), } - return resource - - def create(self, client=None): - """API call: create the table via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - path = '/projects/%s/datasets/%s/tables' % ( - self._dataset.project, self._dataset.name) - api_response = client._connection.api_request( - method='POST', path=path, data=self._build_resource()) - self._set_properties(api_response) - - def exists(self, client=None): - """API call: test for the existence of the table via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: bool - :returns: Boolean indicating existence of the table. - """ - client = self._require_client(client) - - try: - client._connection.api_request(method='GET', path=self.path, - query_params={'fields': 'id'}) - except exceptions.NotFound: - return False + def _populate_external_config(self, resource): + if not self.external_data_configuration: + resource['externalDataConfiguration'] = None else: - return True - - def reload(self, client=None): - """API call: refresh table properties via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/get - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - - api_response = client._connection.api_request( - method='GET', path=self.path) - self._set_properties(api_response) - - def patch(self, - client=None, - friendly_name=_MARKER, - description=_MARKER, - location=_MARKER, - expires=_MARKER, - view_query=_MARKER, - schema=_MARKER): - """API call: update individual table properties via a PATCH request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :type friendly_name: str - :param friendly_name: (Optional) a descriptive name for this table. - - :type description: str - :param description: (Optional) a description of this table. - - :type location: str - :param location: - (Optional) the geographic location where the table resides. - - :type expires: :class:`datetime.datetime` - :param expires: (Optional) point in time at which the table expires. - - :type view_query: str - :param view_query: SQL query defining the table as a view - - :type schema: list of :class:`SchemaField` - :param schema: fields describing the schema - - :raises: ValueError for invalid value types. - """ - client = self._require_client(client) - - partial = {} - - if expires is not _MARKER: - if (not isinstance(expires, datetime.datetime) and - expires is not None): - raise ValueError("Pass a datetime, or None") - partial['expirationTime'] = _millis_from_datetime(expires) - - if description is not _MARKER: - partial['description'] = description - - if friendly_name is not _MARKER: - partial['friendlyName'] = friendly_name - - if location is not _MARKER: - partial['location'] = location - - if view_query is not _MARKER: - if view_query is None: - partial['view'] = None - else: - partial['view'] = {'query': view_query} - - if schema is not _MARKER: - if schema is None: - partial['schema'] = None - else: - partial['schema'] = { - 'fields': _build_schema_resource(schema)} - - api_response = client._connection.api_request( - method='PATCH', path=self.path, data=partial) - self._set_properties(api_response) - - def update(self, client=None): - """API call: update table properties via a PUT request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - api_response = client._connection.api_request( - method='PUT', path=self.path, data=self._build_resource()) - self._set_properties(api_response) - - def delete(self, client=None): - """API call: delete the table via a DELETE request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - """ - client = self._require_client(client) - client._connection.api_request(method='DELETE', path=self.path) - - def fetch_data(self, max_results=None, page_token=None, client=None): - """API call: fetch the table data via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list - - .. note:: - - This method assumes that its instance's ``schema`` attribute is - up-to-date with the schema as defined on the back-end: if the - two schemas are not identical, the values returned may be - incomplete. To ensure that the local copy of the schema is - up-to-date, call :meth:`reload`. - - :type max_results: int - :param max_results: (Optional) Maximum number of rows to return. - - :type page_token: str - :param page_token: (Optional) Token representing a cursor into the - table's rows. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls - back to the ``client`` stored on the current dataset. + resource['externalDataConfiguration'] = ExternalConfig.to_api_repr( + self.external_data_configuration) + + custom_resource_fields = { + 'expires': _populate_expires_resource, + 'partitioning_type': _populate_partitioning_type_resource, + 'view_query': _populate_view_query_resource, + 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource, + 'schema': _populate_schema_resource, + 'external_data_configuration': _populate_external_config, + } - :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of row data :class:`tuple`s. During each page, the - iterator will have the ``total_rows`` attribute set, - which counts the total number of rows **in the table** - (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - params = {} - - if max_results is not None: - params['maxResults'] = max_results - - client = self._require_client(client) - path = '%s/data' % (self.path,) - iterator = page_iterator.HTTPIterator( - client=client, - api_request=client._connection.api_request, - path=path, - item_to_value=_item_to_row, - items_key='rows', - page_token=page_token, - page_start=_rows_page_start, - next_token='pageToken', - extra_params=params) - iterator.schema = self._schema - return iterator - - def row_from_mapping(self, mapping): - """Convert a mapping to a row tuple using the schema. - - :type mapping: dict - :param mapping: Mapping of row data: must contain keys for all - required fields in the schema. Keys which do not correspond - to a field in the schema are ignored. - - :rtype: tuple - :returns: Tuple whose elements are ordered according to the table's - schema. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - row = [] - for field in self.schema: - if field.mode == 'REQUIRED': - row.append(mapping[field.name]) - elif field.mode == 'REPEATED': - row.append(mapping.get(field.name, ())) - elif field.mode == 'NULLABLE': - row.append(mapping.get(field.name)) + def _build_resource(self, filter_fields): + """Generate a resource for ``create`` or ``update``.""" + resource = { + 'tableReference': { + 'projectId': self._project, + 'datasetId': self._dataset_id, + 'tableId': self.table_id}, + } + for f in filter_fields: + if f in self.custom_resource_fields: + self.custom_resource_fields[f](self, resource) else: - raise ValueError( - "Unknown field mode: {}".format(field.mode)) - return tuple(row) - - def insert_data(self, - rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - client=None): - """API call: insert table data via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll - - :type rows: list of tuples - :param rows: Row data to be inserted. Each tuple should contain data - for each schema field on the current table and in the - same order as the schema fields. - - :type row_ids: list of string - :param row_ids: Unique ids, one per row being inserted. If not - passed, no de-duplication occurs. - - :type skip_invalid_rows: bool - :param skip_invalid_rows: (Optional) Insert all valid rows of a - request, even if invalid rows exist. - The default value is False, which causes - the entire request to fail if any invalid - rows exist. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: (Optional) Accept rows that contain - values that do not match the schema. - The unknown values are ignored. Default - is False, which treats unknown values as - errors. - - :type template_suffix: str - :param template_suffix: - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See - https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - - :type client: :class:`~google.cloud.bigquery.client.Client` or - ``NoneType`` - :param client: the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - :rtype: list of mappings - :returns: One mapping per row with insert errors: the "index" key - identifies the row, and the "errors" key contains a list - of the mappings describing one or more problems with the - row. - :raises: ValueError if table's schema is not set - """ - if len(self._schema) == 0: - raise ValueError(_TABLE_HAS_NO_SCHEMA) - - client = self._require_client(client) - rows_info = [] - data = {'rows': rows_info} - - for index, row in enumerate(rows): - row_info = {} - - for field, value in zip(self._schema, row): - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is not None: # STRING doesn't need converting - value = converter(value) - row_info[field.name] = value - - info = {'json': row_info} - if row_ids is not None: - info['insertId'] = row_ids[index] - - rows_info.append(info) - - if skip_invalid_rows is not None: - data['skipInvalidRows'] = skip_invalid_rows - - if ignore_unknown_values is not None: - data['ignoreUnknownValues'] = ignore_unknown_values - - if template_suffix is not None: - data['templateSuffix'] = template_suffix - - response = client._connection.api_request( - method='POST', - path='%s/insertAll' % self.path, - data=data) - errors = [] - - for error in response.get('insertErrors', ()): - errors.append({'index': int(error['index']), - 'errors': error['errors']}) - - return errors - - def _get_transport(self, client): - """Return the client's transport. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :rtype transport: - :class:`~google.auth.transport.requests.AuthorizedSession` - :returns: The transport (with credentials) that will - make authenticated requests. - """ - return client._http - - def _initiate_resumable_upload(self, client, stream, - metadata, num_retries): - """Initiate a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: tuple - :returns: - Pair of - - * The :class:`~google.resumable_media.requests.ResumableUpload` - that was created - * The ``transport`` used to initiate the upload. - """ - chunk_size = _DEFAULT_CHUNKSIZE - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) - upload = ResumableUpload(upload_url, chunk_size, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, - stream_final=False) - - return upload, transport - - def _do_resumable_upload(self, client, stream, metadata, num_retries): - """Perform a resumable upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the final chunk - is uploaded. - """ - upload, transport = self._initiate_resumable_upload( - client, stream, metadata, num_retries) - - while not upload.finished: - response = upload.transmit_next_chunk(transport) - - return response - - def _do_multipart_upload(self, client, stream, metadata, - size, num_retries): - """Perform a multipart upload. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: :class:`~requests.Response` - :returns: The "200 OK" response object returned after the multipart - upload request. - :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size`` - bytes remaining. - """ - data = stream.read(size) - if len(data) < size: - msg = _READ_LESS_THAN_SIZE.format(size, len(data)) - raise ValueError(msg) - - transport = self._get_transport(client) - headers = _get_upload_headers(client._connection.USER_AGENT) - - upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) - upload = MultipartUpload(upload_url, headers=headers) - - if num_retries is not None: - upload._retry_strategy = resumable_media.RetryStrategy( - max_retries=num_retries) - - response = upload.transmit( - transport, data, metadata, _GENERIC_CONTENT_TYPE) - - return response - - def _do_upload(self, client, stream, metadata, size, num_retries): - """Determine an upload strategy and then perform the upload. - - If ``size`` is :data:`None`, then a resumable upload will be used, - otherwise the content and the metadata will be uploaded - in a single multipart upload request. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: The client to use. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type metadata: dict - :param metadata: The metadata associated with the upload. - - :type size: int - :param size: The number of bytes to be uploaded (which will be read - from ``stream``). If not provided, the upload will be - concluded once ``stream`` is exhausted (or :data:`None`). - - :type num_retries: int - :param num_retries: Number of upload retries. (Deprecated: This - argument will be removed in a future release.) - - :rtype: dict - :returns: The parsed JSON from the "200 OK" response. This will be the - **only** response in the multipart case and it will be the - **final** response in the resumable case. - """ - if size is None: - response = self._do_resumable_upload( - client, stream, metadata, num_retries) - else: - response = self._do_multipart_upload( - client, stream, metadata, size, num_retries) - - return response.json() - - # pylint: disable=too-many-arguments,too-many-locals - def upload_from_file(self, - file_obj, - source_format, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - allow_jagged_rows=None, - allow_quoted_newlines=None, - create_disposition=None, - encoding=None, - field_delimiter=None, - ignore_unknown_values=None, - max_bad_records=None, - quote_character=None, - skip_leading_rows=None, - write_disposition=None, - client=None, - job_name=None, - null_marker=None): - """Upload the contents of this table from a file-like object. - - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - :type source_format: str - :param source_format: Any supported format. The full list of supported - formats is documented under the - ``configuration.extract.destinationFormat`` property on this page: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs - - :type rewind: bool - :param rewind: If True, seek to the beginning of the file handle before - writing the file. - - :type size: int - :param size: The number of bytes to read from the file handle. - If not provided, we'll try to guess the size using - :func:`os.fstat`. (If the file handle is not from the - filesystem this won't be possible.) - - :type num_retries: int - :param num_retries: Number of upload retries. Defaults to 6. - - :type allow_jagged_rows: bool - :param allow_jagged_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type allow_quoted_newlines: bool - :param allow_quoted_newlines: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type create_disposition: str - :param create_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type encoding: str - :param encoding: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type field_delimiter: str - :param field_delimiter: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type ignore_unknown_values: bool - :param ignore_unknown_values: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type max_bad_records: int - :param max_bad_records: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type quote_character: str - :param quote_character: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type skip_leading_rows: int - :param skip_leading_rows: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type write_disposition: str - :param write_disposition: job configuration option; see - :meth:`google.cloud.bigquery.job.LoadJob`. - - :type client: :class:`~google.cloud.bigquery.client.Client` - :param client: (Optional) The client to use. If not passed, falls back - to the ``client`` stored on the current table. - - :type job_name: str - :param job_name: Optional. The id of the job. Generated if not - explicitly passed in. - - :type null_marker: str - :param null_marker: Optional. A custom null marker (example: "\\N") - - :rtype: :class:`~google.cloud.bigquery.jobs.LoadTableFromStorageJob` - - :returns: the job instance used to load the data (e.g., for - querying status). Note that the job is already started: - do not call ``job.begin()``. - :raises: :class:`ValueError` if ``size`` is not passed in and can not - be determined, or if the ``file_obj`` can be detected to be - a file opened in text mode. - """ - client = self._require_client(client) - _maybe_rewind(file_obj, rewind=rewind) - _check_mode(file_obj) - metadata = _get_upload_metadata( - source_format, self._schema, self._dataset, self.name) - _configure_job_metadata(metadata, allow_jagged_rows, - allow_quoted_newlines, create_disposition, - encoding, field_delimiter, - ignore_unknown_values, max_bad_records, - quote_character, skip_leading_rows, - write_disposition, job_name, null_marker) - - try: - created_json = self._do_upload( - client, file_obj, metadata, size, num_retries) - return client.job_from_resource(created_json) - except resumable_media.InvalidResponse as exc: - raise exceptions.from_http_response(exc.response) - # pylint: enable=too-many-arguments,too-many-locals - - -def _configure_job_metadata(metadata, # pylint: disable=too-many-arguments - allow_jagged_rows, - allow_quoted_newlines, - create_disposition, - encoding, - field_delimiter, - ignore_unknown_values, - max_bad_records, - quote_character, - skip_leading_rows, - write_disposition, - job_name, - null_marker): - """Helper for :meth:`Table.upload_from_file`.""" - load_config = metadata['configuration']['load'] - - if allow_jagged_rows is not None: - load_config['allowJaggedRows'] = allow_jagged_rows - - if allow_quoted_newlines is not None: - load_config['allowQuotedNewlines'] = allow_quoted_newlines - - if create_disposition is not None: - load_config['createDisposition'] = create_disposition - - if encoding is not None: - load_config['encoding'] = encoding - - if field_delimiter is not None: - load_config['fieldDelimiter'] = field_delimiter - - if ignore_unknown_values is not None: - load_config['ignoreUnknownValues'] = ignore_unknown_values - - if max_bad_records is not None: - load_config['maxBadRecords'] = max_bad_records - - if quote_character is not None: - load_config['quote'] = quote_character - - if skip_leading_rows is not None: - load_config['skipLeadingRows'] = skip_leading_rows - - if write_disposition is not None: - load_config['writeDisposition'] = write_disposition - - if job_name is not None: - load_config['jobReference'] = {'jobId': job_name} - - if null_marker is not None: - load_config['nullMarker'] = null_marker - - -def _parse_schema_resource(info): - """Parse a resource fragment into a schema field. - - :type info: mapping - :param info: should contain a "fields" key to be parsed - - :rtype: list of :class:`SchemaField`, or ``NoneType`` - :returns: a list of parsed fields, or ``None`` if no "fields" key is - present in ``info``. - """ - if 'fields' not in info: - return () - - schema = [] - for r_field in info['fields']: - name = r_field['name'] - field_type = r_field['type'] - mode = r_field.get('mode', 'NULLABLE') - description = r_field.get('description') - sub_fields = _parse_schema_resource(r_field) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields)) - return schema - - -def _build_schema_resource(fields): - """Generate a resource fragment for a schema. - - :type fields: sequence of :class:`SchemaField` - :param fields: schema to be dumped - - :rtype: mapping - :returns: a mapping describing the schema of the supplied fields. - """ - infos = [] - for field in fields: - info = {'name': field.name, - 'type': field.field_type, - 'mode': field.mode} - if field.description is not None: - info['description'] = field.description - if field.fields: - info['fields'] = _build_schema_resource(field.fields) - infos.append(info) - return infos -# pylint: enable=unused-argument - - -def _maybe_rewind(stream, rewind=False): - """Rewind the stream if desired. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :type rewind: bool - :param rewind: Indicates if we should seek to the beginning of the stream. - """ - if rewind: - stream.seek(0, os.SEEK_SET) - - -def _check_mode(stream): - """Check that a stream was opened in read-binary mode. - - :type stream: IO[bytes] - :param stream: A bytes IO object open for reading. - - :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute - and is not among ``rb``, ``r+b`` or ``rb+``. - """ - mode = getattr(stream, 'mode', None) + api_field = _snake_to_camel_case(f) + resource[api_field] = getattr(self, f) + return resource - if mode is not None and mode not in ('rb', 'r+b', 'rb+'): - raise ValueError( - "Cannot upload files opened in text mode: use " - "open(filename, mode='rb') or open(filename, mode='r+b')") +def _row_from_mapping(mapping, schema): + """Convert a mapping to a row tuple using the schema. -def _get_upload_headers(user_agent): - """Get the headers for an upload request. + :type mapping: dict + :param mapping: Mapping of row data: must contain keys for all + required fields in the schema. Keys which do not correspond + to a field in the schema are ignored. - :type user_agent: str - :param user_agent: The user-agent for requests. + :type schema: list of :class:`~google.cloud.bigquery.SchemaField` + :param schema: The schema of the table destination for the rows - :rtype: dict - :returns: The headers to be used for the request. + :rtype: tuple + :returns: Tuple whose elements are ordered according to the schema. + :raises: ValueError if schema is empty """ - return { - 'Accept': 'application/json', - 'Accept-Encoding': 'gzip, deflate', - 'User-Agent': user_agent, - 'content-type': 'application/json', - } - - -def _get_upload_metadata(source_format, schema, dataset, name): - """Get base metadata for creating a table. - - :type source_format: str - :param source_format: one of 'CSV' or 'NEWLINE_DELIMITED_JSON'. - job configuration option. + if len(schema) == 0: + raise ValueError(_TABLE_HAS_NO_SCHEMA) + + row = [] + for field in schema: + if field.mode == 'REQUIRED': + row.append(mapping[field.name]) + elif field.mode == 'REPEATED': + row.append(mapping.get(field.name, ())) + elif field.mode == 'NULLABLE': + row.append(mapping.get(field.name)) + else: + raise ValueError( + "Unknown field mode: {}".format(field.mode)) + return tuple(row) - :type schema: list - :param schema: List of :class:`SchemaField` associated with a table. - :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset` - :param dataset: A dataset which contains a table. +class StreamingBuffer(object): + """Information about a table's streaming buffer. - :type name: str - :param name: The name of the table. + See https://cloud.google.com/bigquery/streaming-data-into-bigquery. - :rtype: dict - :returns: The metadata dictionary. + :type resource: dict + :param resource: streaming buffer representation returned from the API """ - load_config = { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.name, - 'tableId': name, - }, - } - if schema: - load_config['schema'] = { - 'fields': _build_schema_resource(schema), - } - return { - 'configuration': { - 'load': load_config, - }, - } + def __init__(self, resource): + self.estimated_bytes = int(resource['estimatedBytes']) + self.estimated_rows = int(resource['estimatedRows']) + # time is in milliseconds since the epoch. + self.oldest_entry_time = _datetime_from_microseconds( + 1000.0 * int(resource['oldestEntryTime'])) diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 0d5b9918fef3..335beda29863 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -24,10 +24,13 @@ import six +from google.api_core.exceptions import PreconditionFailed from google.cloud import bigquery +from google.cloud.bigquery.dataset import Dataset, DatasetReference +from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi -from google.cloud.exceptions import Forbidden +from google.cloud.exceptions import Forbidden, NotFound from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -38,12 +41,25 @@ JOB_TIMEOUT = 120 # 2 minutes WHERE = os.path.abspath(os.path.dirname(__file__)) +# Common table data used for many tests. +ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), +] +HEADER_ROW = ('Full Name', 'Age') +SCHEMA = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), +] + def _has_rows(result): return len(result) > 0 -def _make_dataset_name(prefix): +def _make_dataset_id(prefix): return '%s%s' % (prefix, unique_resource_id()) @@ -90,7 +106,6 @@ def setUp(self): self.to_delete = [] def tearDown(self): - from google.cloud.bigquery.dataset import Dataset from google.cloud.storage import Bucket from google.cloud.exceptions import BadRequest from google.cloud.exceptions import Conflict @@ -105,68 +120,65 @@ def _still_in_use(bad_request): if isinstance(doomed, Bucket): retry_409(doomed.delete)(force=True) elif isinstance(doomed, Dataset): - retry_in_use(doomed.delete)() + retry_in_use(Config.CLIENT.delete_dataset)(doomed) + elif isinstance(doomed, Table): + retry_in_use(Config.CLIENT.delete_table)(doomed) else: doomed.delete() def test_create_dataset(self): - DATASET_NAME = _make_dataset_name('create_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() + DATASET_ID = _make_dataset_id('create_dataset') + dataset = self.temp_dataset(DATASET_ID) + + self.assertTrue(_dataset_exists(dataset)) + self.assertEqual(dataset.dataset_id, DATASET_ID) + self.assertEqual(dataset.project, Config.CLIENT.project) + + def test_get_dataset(self): + DATASET_ID = _make_dataset_id('get_dataset') + client = Config.CLIENT + dataset_arg = Dataset(client.dataset(DATASET_ID)) + dataset_arg.friendly_name = 'Friendly' + dataset_arg.description = 'Description' + dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) + dataset_ref = client.dataset(DATASET_ID) - self.assertTrue(dataset.exists()) - self.assertEqual(dataset.name, DATASET_NAME) - - def test_reload_dataset(self): - DATASET_NAME = _make_dataset_name('reload_dataset') - dataset = Config.CLIENT.dataset(DATASET_NAME) - dataset.friendly_name = 'Friendly' - dataset.description = 'Description' - - retry_403(dataset.create)() - self.to_delete.append(dataset) + got = client.get_dataset(dataset_ref) - other = Config.CLIENT.dataset(DATASET_NAME) - other.reload() - self.assertEqual(other.friendly_name, 'Friendly') - self.assertEqual(other.description, 'Description') + self.assertEqual(got.friendly_name, 'Friendly') + self.assertEqual(got.description, 'Description') - def test_patch_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_dataset')) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() - self.to_delete.append(dataset) - - self.assertTrue(dataset.exists()) + def test_update_dataset(self): + dataset = self.temp_dataset(_make_dataset_id('update_dataset')) + self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) - dataset.patch(friendly_name='Friendly', description='Description') - self.assertEqual(dataset.friendly_name, 'Friendly') - self.assertEqual(dataset.description, 'Description') - - def test_update_dataset(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_dataset')) - self.assertFalse(dataset.exists()) + self.assertEquals(dataset.labels, {}) - retry_403(dataset.create)() - self.to_delete.append(dataset) - - self.assertTrue(dataset.exists()) - after = [grant for grant in dataset.access_grants - if grant.entity_id != 'projectWriters'] - dataset.access_grants = after - - retry_403(dataset.update)() + dataset.friendly_name = 'Friendly' + dataset.description = 'Description' + dataset.labels = {'priority': 'high', 'color': 'blue'} + ds2 = Config.CLIENT.update_dataset( + dataset, + ('friendly_name', 'description', 'labels')) + self.assertEqual(ds2.friendly_name, 'Friendly') + self.assertEqual(ds2.description, 'Description') + self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'}) + + ds2.labels = { + 'color': 'green', # change + 'shape': 'circle', # add + 'priority': None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ['labels']) + self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'}) - self.assertEqual(len(dataset.access_grants), len(after)) - for found, expected in zip(dataset.access_grants, after): - self.assertEqual(found.role, expected.role) - self.assertEqual(found.entity_type, expected.entity_type) - self.assertEqual(found.entity_id, expected.entity_id) + # If we try to update using d2 again, it will fail because the + # previous update changed the ETag. + ds2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_dataset(ds2, ['description']) def test_list_datasets(self): datasets_to_create = [ @@ -174,48 +186,50 @@ def test_list_datasets(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - for dataset_name in datasets_to_create: - created_dataset = Config.CLIENT.dataset(dataset_name) - retry_403(created_dataset.create)() - self.to_delete.append(created_dataset) + for dataset_id in datasets_to_create: + self.temp_dataset(dataset_id) # Retrieve the datasets. iterator = Config.CLIENT.list_datasets() all_datasets = list(iterator) self.assertIsNone(iterator.next_page_token) created = [dataset for dataset in all_datasets - if dataset.name in datasets_to_create and + if dataset.dataset_id in datasets_to_create and dataset.project == Config.CLIENT.project] self.assertEqual(len(created), len(datasets_to_create)) def test_create_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('create_table')) - self.assertFalse(dataset.exists()) + dataset = self.temp_dataset(_make_dataset_id('create_table')) + table_id = 'test_table' + table_arg = Table(dataset.table(table_id), schema=SCHEMA) + self.assertFalse(_table_exists(table_arg)) - retry_403(dataset.create)() - self.to_delete.append(dataset) - - TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - self.assertFalse(table.exists()) - table.create() + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) - self.assertEqual(table.name, TABLE_NAME) - def test_list_tables(self): - DATASET_NAME = _make_dataset_name('list_tables') - dataset = Config.CLIENT.dataset(DATASET_NAME) - self.assertFalse(dataset.exists()) + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) - retry_403(dataset.create)() - self.to_delete.append(dataset) + def test_get_table_w_public_dataset(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_ID = 'shakespeare' + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID) + + table = Config.CLIENT.get_table(table_ref) + + self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.dataset_id, DATASET_ID) + self.assertEqual(table.project, PUBLIC) + schema_names = [field.name for field in table.schema] + self.assertEqual( + schema_names, ['word', 'word_count', 'corpus', 'corpus_date']) + def test_list_dataset_tables(self): + DATASET_ID = _make_dataset_id('list_tables') + dataset = self.temp_dataset(DATASET_ID) # Retrieve tables before any are created for the dataset. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertEqual(all_tables, []) self.assertIsNone(iterator.next_page_token) @@ -226,79 +240,89 @@ def test_list_tables(self): 'newer' + unique_resource_id(), 'newest' + unique_resource_id(), ] - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') for table_name in tables_to_create: - created_table = dataset.table(table_name, schema=[full_name, age]) - created_table.create() + table = Table(dataset.table(table_name), schema=SCHEMA) + created_table = retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. - iterator = dataset.list_tables() + iterator = Config.CLIENT.list_dataset_tables(dataset) all_tables = list(iterator) self.assertIsNone(iterator.next_page_token) created = [table for table in all_tables - if (table.name in tables_to_create and - table.dataset_name == DATASET_NAME)] + if (table.table_id in tables_to_create and + table.dataset_id == DATASET_ID)] self.assertEqual(len(created), len(tables_to_create)) - def test_patch_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('patch_table')) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() - self.to_delete.append(dataset) + def test_update_table(self): + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) + self.assertFalse(_table_exists(table_arg)) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) self.assertIsNone(table.description) - table.patch(friendly_name='Friendly', description='Description') - self.assertEqual(table.friendly_name, 'Friendly') - self.assertEqual(table.description, 'Description') + self.assertEquals(table.labels, {}) + table.friendly_name = 'Friendly' + table.description = 'Description' + table.labels = {'priority': 'high', 'color': 'blue'} + + table2 = Config.CLIENT.update_table( + table, ['friendly_name', 'description', 'labels']) + + self.assertEqual(table2.friendly_name, 'Friendly') + self.assertEqual(table2.description, 'Description') + self.assertEqual(table2.labels, {'priority': 'high', 'color': 'blue'}) + + table2.description = None + table2.labels = { + 'color': 'green', # change + 'shape': 'circle', # add + 'priority': None, # delete + } + table3 = Config.CLIENT.update_table(table2, ['description', 'labels']) + self.assertIsNone(table3.description) + self.assertEqual(table3.labels, {'color': 'green', 'shape': 'circle'}) - def test_update_table(self): - dataset = Config.CLIENT.dataset(_make_dataset_name('update_table')) - self.assertFalse(dataset.exists()) + # If we try to update using table2 again, it will fail because the + # previous update changed the ETag. + table2.description = 'no good' + with self.assertRaises(PreconditionFailed): + Config.CLIENT.update_table(table2, ['description']) - retry_403(dataset.create)() - self.to_delete.append(dataset) + def test_update_table_schema(self): + dataset = self.temp_dataset(_make_dataset_id('update_table')) TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - self.assertFalse(table.exists()) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) + self.assertFalse(_table_exists(table_arg)) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE') schema = table.schema schema.append(voter) table.schema = schema - table.update() - self.assertEqual(len(table.schema), len(schema)) - for found, expected in zip(table.schema, schema): + + updated_table = Config.CLIENT.update_table(table, ['schema']) + + self.assertEqual(len(updated_table.schema), len(schema)) + for found, expected in zip(updated_table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) @staticmethod - def _fetch_single_page(table): - iterator = table.fetch_data() + def _fetch_single_page(table, selected_fields=None): + iterator = Config.CLIENT.list_rows( + table, selected_fields=selected_fields) page = six.next(iterator.pages) return list(page) - def test_insert_data_then_dump_table(self): + def test_create_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp( NOW_SECONDS).replace(tzinfo=UTC) @@ -309,25 +333,21 @@ def test_insert_data_then_dump_table(self): ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) - dataset = Config.CLIENT.dataset( - _make_dataset_name('insert_data_then_dump')) - self.assertFalse(dataset.exists()) - - retry_403(dataset.create)() - self.to_delete.append(dataset) - TABLE_NAME = 'test_table' - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - now = bigquery.SchemaField('now', 'TIMESTAMP') - table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) - self.assertFalse(table.exists()) - table.create() + dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump')) + TABLE_ID = 'test_table' + schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), + bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), + bigquery.SchemaField('now', 'TIMESTAMP'), + ] + table_arg = Table(dataset.table(TABLE_ID), schema=schema) + self.assertFalse(_table_exists(table_arg)) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) + self.assertTrue(_table_exists(table)) - errors = table.insert_data(ROWS, ROW_IDS) + errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS) self.assertEqual(len(errors), 0) rows = () @@ -337,49 +357,37 @@ def test_insert_data_then_dump_table(self): # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) - + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_file_then_dump_table(self): from google.cloud._testing import _NamedTemporaryFile - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] TABLE_NAME = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) - - retry_403(dataset.create)() - self.to_delete.append(dataset) - - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - table.create() + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) + table_ref = dataset.table(TABLE_NAME) + table_arg = Table(table_ref, schema=SCHEMA) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) + writer.writerow(HEADER_ROW) writer.writerows(ROWS) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + config.schema = table.schema + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) @@ -387,8 +395,9 @@ def test_load_table_from_local_file_then_dump_table(self): self.assertEqual(job.output_rows, len(ROWS)) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), + self.assertEqual(sorted(row_tuples, key=by_age), sorted(ROWS, key=by_age)) def test_load_table_from_local_avro_file_then_dump_table(self): @@ -402,91 +411,79 @@ def test_load_table_from_local_avro_file_then_dump_table(self): ("orange", 590), ("red", 650)] - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_local_then_dump')) - - retry_403(dataset.create)() - self.to_delete.append(dataset) - - table = dataset.table(TABLE_NAME) + dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump')) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref) self.to_delete.insert(0, table) with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof: - job = table.upload_from_file( - avrof, - source_format='AVRO', - write_disposition='WRITE_TRUNCATE' - ) - + config = bigquery.LoadJobConfig() + config.source_format = 'AVRO' + config.write_disposition = 'WRITE_TRUNCATE' + job = Config.CLIENT.load_table_from_file( + avrof, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) self.assertEqual(job.output_rows, len(ROWS)) - # Reload table to get the schema before fetching the rows. - table.reload() + table = Config.CLIENT.get_table(table) rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] by_wavelength = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_wavelength), + self.assertEqual(sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)) - def test_load_table_from_storage_then_dump_table(self): - from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage import Client as StorageClient - - local_id = unique_resource_id() - BUCKET_NAME = 'bq_load_test' + local_id - BLOB_NAME = 'person_ages.csv' - GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME) - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - TABLE_NAME = 'test_table' + def test_load_table_from_uri_then_dump_table(self): + TABLE_ID = 'test_table' + GS_URL = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + HEADER_ROW, ROWS) - storage_client = StorageClient() + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) - # In the **very** rare case the bucket name is reserved, this - # fails with a ConnectionError. - bucket = storage_client.create_bucket(BUCKET_NAME) - self.to_delete.append(bucket) - - blob = bucket.blob(BLOB_NAME) - - with _NamedTemporaryFile() as temp: - with open(temp.name, 'w') as csv_write: - writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(ROWS) + table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) - with open(temp.name, 'rb') as csv_read: - blob.upload_from_file(csv_read, content_type='text/csv') + config = bigquery.LoadJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_EMPTY' + job = Config.CLIENT.load_table_from_uri( + GS_URL, dataset.table(TABLE_ID), job_config=config) - self.to_delete.insert(0, blob) + # Allow for 90 seconds of "warm up" before rows visible. See + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds + retry = RetryInstanceState(_job_done, max_tries=8) + retry(job.reload)() - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(ROWS, key=by_age)) - retry_403(dataset.create)() - self.to_delete.append(dataset) + def test_load_table_from_uri_w_autodetect_schema_then_get_job(self): + from google.cloud.bigquery import SchemaField + from google.cloud.bigquery.job import LoadJob - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - table.create() - self.to_delete.insert(0, table) + rows = ROWS * 100 + # BigQuery internally uses the first 100 rows to detect schema - job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, GS_URL) - job.create_disposition = 'CREATE_NEVER' - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_EMPTY' + gs_url = self._write_csv_to_storage( + 'bq_load_test' + unique_resource_id(), 'person_ages.csv', + HEADER_ROW, rows) + dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump')) + table_ref = dataset.table('test_table') + JOB_ID = 'load_table_w_autodetect_{}'.format(str(uuid.uuid4())) - job.begin() + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_uri( + gs_url, table_ref, job_config=config, job_id=JOB_ID) # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability @@ -494,27 +491,29 @@ def test_load_table_from_storage_then_dump_table(self): retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - rows = self._fetch_single_page(table) + table = Config.CLIENT.get_table(table_ref) + self.to_delete.insert(0, table) + field_name = SchemaField( + u'Full_Name', u'string', u'NULLABLE', None, ()) + field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) + self.assertEqual(table.schema, [field_name, field_age]) + + actual_rows = self._fetch_single_page(table) + actual_row_tuples = [r.values() for r in actual_rows] by_age = operator.itemgetter(1) - self.assertEqual(sorted(rows, key=by_age), - sorted(ROWS, key=by_age)) + self.assertEqual( + sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age)) + + fetched_job = Config.CLIENT.get_job(JOB_ID) - def test_load_table_from_storage_w_autodetect_schema(self): + self.assertIsInstance(fetched_job, LoadJob) + self.assertEqual(fetched_job.job_id, JOB_ID) + self.assertEqual(fetched_job.autodetect, True) + + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, + data_rows): from google.cloud._testing import _NamedTemporaryFile from google.cloud.storage import Client as StorageClient - from google.cloud.bigquery import SchemaField - - local_id = unique_resource_id() - bucket_name = 'bq_load_test' + local_id - blob_name = 'person_ages.csv' - gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) - rows = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] * 100 # BigQuery internally uses the first 100 rows to detect schema - table_name = 'test_table' storage_client = StorageClient() @@ -528,66 +527,140 @@ def test_load_table_from_storage_w_autodetect_schema(self): with _NamedTemporaryFile() as temp: with open(temp.name, 'w') as csv_write: writer = csv.writer(csv_write) - writer.writerow(('Full Name', 'Age')) - writer.writerows(rows) + writer.writerow(header_row) + writer.writerows(data_rows) with open(temp.name, 'rb') as csv_read: blob.upload_from_file(csv_read, content_type='text/csv') self.to_delete.insert(0, blob) - dataset = Config.CLIENT.dataset( - _make_dataset_name('load_gcs_then_dump')) + return 'gs://{}/{}'.format(bucket_name, blob_name) - retry_403(dataset.create)() - self.to_delete.append(dataset) + def _load_table_for_extract_table( + self, storage_client, rows, bucket_name, blob_name, table): + from google.cloud._testing import _NamedTemporaryFile - table = dataset.table(table_name) - self.to_delete.insert(0, table) + gs_url = 'gs://{}/{}'.format(bucket_name, blob_name) - job = Config.CLIENT.load_table_from_storage( - 'bq_load_storage_test_' + local_id, table, gs_url) - job.autodetect = True + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + self.to_delete.append(bucket) + blob = bucket.blob(blob_name) - job.begin() + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(HEADER_ROW) + writer.writerows(rows) + + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') + self.to_delete.insert(0, blob) + dataset = self.temp_dataset(table.dataset_id) + table_ref = dataset.table(table.table_id) + config = bigquery.LoadJobConfig() + config.autodetect = True + job = Config.CLIENT.load_table_from_uri(gs_url, table_ref, + job_config=config) + # TODO(jba): do we need this retry now that we have job.result()? # Allow for 90 seconds of "warm up" before rows visible. See # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds retry = RetryInstanceState(_job_done, max_tries=8) retry(job.reload)() - table.reload() - field_name = SchemaField( - u'Full_Name', u'string', u'NULLABLE', None, ()) - field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ()) - self.assertEqual(table.schema, [field_name, field_age]) + def test_extract_table(self): + from google.cloud.storage import Client as StorageClient - actual_rows = self._fetch_single_page(table) - by_age = operator.itemgetter(1) - self.assertEqual( - sorted(actual_rows, key=by_age), sorted(rows, key=by_age)) + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref) + self.to_delete.insert(0, table) + self._load_table_for_extract_table( + storage_client, ROWS, bucket_name, blob_name, table_ref) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job = Config.CLIENT.extract_table(table_ref, destination_uri) + job.result(timeout=100) + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('Bharney Rhubble', got) + + def test_extract_table_w_job_config(self): + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + local_id = unique_resource_id() + bucket_name = 'bq_extract_test' + local_id + blob_name = 'person_ages.csv' + dataset_id = _make_dataset_id('load_gcs_then_extract') + table_id = 'test_table' + table_ref = Config.CLIENT.dataset(dataset_id).table(table_id) + table = Table(table_ref) + self.to_delete.insert(0, table) + self._load_table_for_extract_table( + storage_client, ROWS, bucket_name, blob_name, table_ref) + bucket = storage_client.bucket(bucket_name) + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name) + + job_config = bigquery.ExtractJobConfig() + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job = Config.CLIENT.extract_table( + table, destination_uri, job_config=job_config) + job.result() + + self.to_delete.insert(0, destination) + got = destination.download_as_string().decode('utf-8') + self.assertIn('"Bharney Rhubble"', got) + + def test_copy_table(self): + # If we create a new table to copy from, the test won't work + # because the new rows will be stored in the streaming buffer, + # and copy jobs don't read the streaming buffer. + # We could wait for the streaming buffer to empty, but that could + # take minutes. Instead we copy a small public table. + source_dataset = DatasetReference('bigquery-public-data', 'samples') + source_ref = source_dataset.table('shakespeare') + dest_dataset = self.temp_dataset(_make_dataset_id('copy_table')) + dest_ref = dest_dataset.table('destination_table') + job_config = bigquery.CopyJobConfig() + job = Config.CLIENT.copy_table( + source_ref, dest_ref, job_config=job_config) + job.result() + + dest_table = Config.CLIENT.get_table(dest_ref) + self.to_delete.insert(0, dest_table) + # Just check that we got some rows. + got_rows = self._fetch_single_page(dest_table) + self.assertTrue(len(got_rows) > 0) def test_job_cancel(self): - DATASET_NAME = _make_dataset_name('job_cancel') - JOB_NAME = 'fetch_' + DATASET_NAME + DATASET_ID = _make_dataset_id('job_cancel') + JOB_ID_PREFIX = 'fetch_' + DATASET_ID TABLE_NAME = 'test_table' - QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME) - - dataset = Config.CLIENT.dataset(DATASET_NAME) + QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME) - retry_403(dataset.create)() - self.to_delete.append(dataset) + dataset = self.temp_dataset(DATASET_ID) - full_name = bigquery.SchemaField('full_name', 'STRING', - mode='REQUIRED') - age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) - table.create() + table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - job = Config.CLIENT.run_async_query(JOB_NAME, QUERY) - job.begin() + job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) job.cancel() retry = RetryInstanceState(_job_done, max_tries=8) @@ -599,7 +672,7 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). - def test_sync_query_w_legacy_sql_types(self): + def test_query_rows_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) @@ -630,12 +703,13 @@ def test_sync_query_w_legacy_sql_types(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = True - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + job_config = bigquery.QueryJobConfig() + job_config.use_legacy_sql = True + rows = list(Config.CLIENT.query_rows( + example['sql'], job_config=job_config)) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def _generate_standard_sql_types_examples(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) @@ -731,15 +805,20 @@ def _generate_standard_sql_types_examples(self): }, ] - def test_sync_query_w_standard_sql_types(self): + def test_query_rows_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() for example in examples: - query = Config.CLIENT.run_sync_query(example['sql']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + rows = list(Config.CLIENT.query_rows(example['sql'])) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) + + def test_query_rows_w_failed_query(self): + from google.api_core.exceptions import BadRequest + + with self.assertRaises(BadRequest): + Config.CLIENT.query_rows('invalid syntax;') + # TODO(swast): Ensure that job ID is surfaced in the exception. def test_dbapi_w_standard_sql_types(self): examples = self._generate_standard_sql_types_examples() @@ -760,19 +839,18 @@ def test_dbapi_fetchall(self): self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") Config.CURSOR.arraysize = arraysize rows = Config.CURSOR.fetchall() - self.assertEqual(rows, [(1, 2), (3, 4), (5, 6)]) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) - def _load_table_for_dml(self, rows, dataset_name, table_name): + def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile - dataset = Config.CLIENT.dataset(dataset_name) - retry_403(dataset.create)() - self.to_delete.append(dataset) - + dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField( 'greeting', 'STRING', mode='NULLABLE') - table = dataset.table(table_name, schema=[greeting]) - table.create() + table_ref = dataset.table(table_id) + table_arg = Table(table_ref, schema=[greeting]) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -782,20 +860,20 @@ def _load_table_for_dml(self, rows, dataset_name, table_name): writer.writerows(rows) with open(temp.name, 'rb') as csv_read: - job = table.upload_from_file( - csv_read, - source_format='CSV', - skip_leading_rows=1, - create_disposition='CREATE_NEVER', - write_disposition='WRITE_EMPTY', - ) + config = bigquery.LoadJobConfig() + config.source_format = 'CSV' + config.skip_leading_rows = 1 + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_EMPTY' + job = Config.CLIENT.load_table_from_file( + csv_read, table_ref, job_config=config) # Retry until done. job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table) - def test_sync_query_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + def test_query_w_dml(self): + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -803,15 +881,15 @@ def test_sync_query_w_dml(self): WHERE greeting = 'Hello World' """ - query = Config.CLIENT.run_sync_query( - query_template.format(dataset_name, table_name)) - query.use_legacy_sql = False - query.run() + query_job = Config.CLIENT.query( + query_template.format(dataset_name, table_name), + job_id_prefix='test_query_w_dml_') + query_job.result() - self.assertEqual(query.num_dml_affected_rows, 1) + self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_name('dml_tests') + dataset_name = _make_dataset_id('dml_tests') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -821,14 +899,15 @@ def test_dbapi_w_dml(self): Config.CURSOR.execute( query_template.format(dataset_name, table_name), - job_id='test_dbapi_w_dml_{}'.format(unique_resource_id())) + job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4()))) self.assertEqual(Config.CURSOR.rowcount, 1) self.assertIsNone(Config.CURSOR.fetchone()) - def test_sync_query_w_query_params(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery._helpers import StructQueryParameter + def test_query_w_query_params(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter question = 'What is the answer to life, the universe, and everything?' question_param = ScalarQueryParameter( name='question', type_='STRING', value=question) @@ -986,14 +1065,16 @@ def test_sync_query_w_query_params(self): }, ] for example in examples: - query = Config.CLIENT.run_sync_query( + jconfig = QueryJobConfig() + jconfig.query_parameters = example['query_parameters'] + query_job = Config.CLIENT.query( example['sql'], - query_parameters=example['query_parameters']) - query.use_legacy_sql = False - query.run() - self.assertEqual(len(query.rows), 1) - self.assertEqual(len(query.rows[0]), 1) - self.assertEqual(query.rows[0][0], example['expected']) + job_config=jconfig, + job_id_prefix='test_query_w_query_params') + rows = list(query_job.result()) + self.assertEqual(len(rows), 1) + self.assertEqual(len(rows[0]), 1) + self.assertEqual(rows[0][0], example['expected']) def test_dbapi_w_query_parameters(self): examples = [ @@ -1080,6 +1161,14 @@ def test_dbapi_w_query_parameters(self): }, 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), }, + { + 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)', + 'query_parameters': { + 'zoned': datetime.datetime( + 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC), + }, + 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC), + }, ] for example in examples: msg = 'sql: {} query_parameters: {}'.format( @@ -1096,41 +1185,95 @@ def test_dbapi_w_query_parameters(self): def test_dump_table_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Config.CLIENT.dataset(DATASET_NAME, project=PUBLIC) - table = dataset.table(TABLE_NAME) - # Reload table to get the schema before fetching the rows. - table.reload() + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) + table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) + def test_dump_table_w_public_data_selected_fields(self): + PUBLIC = 'bigquery-public-data' + DATASET_ID = 'samples' + TABLE_NAME = 'natality' + selected_fields = [ + bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'), + bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'), + ] + table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME) + + rows = self._fetch_single_page( + table_ref, selected_fields=selected_fields) + + self.assertGreater(len(rows), 0) + self.assertEqual(len(rows[0]), 3) + def test_large_query_w_public_data(self): PUBLIC = 'bigquery-public-data' - DATASET_NAME = 'samples' + DATASET_ID = 'samples' TABLE_NAME = 'natality' LIMIT = 1000 SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format( - PUBLIC, DATASET_NAME, TABLE_NAME, LIMIT) + PUBLIC, DATASET_ID, TABLE_NAME, LIMIT) - query = Config.CLIENT.run_sync_query(SQL) - query.use_legacy_sql = False - query.run() + iterator = Config.CLIENT.query_rows(SQL) - iterator = query.fetch_data(max_results=100) rows = list(iterator) self.assertEqual(len(rows), LIMIT) - def test_async_query_future(self): - query_job = Config.CLIENT.run_async_query( - str(uuid.uuid4()), 'SELECT 1') - query_job.use_legacy_sql = False - + def test_query_future(self): + query_job = Config.CLIENT.query('SELECT 1') iterator = query_job.result(timeout=JOB_TIMEOUT) - rows = list(iterator) - self.assertEqual(rows, [(1,)]) + row_tuples = [r.values() for r in iterator] + self.assertEqual(row_tuples, [(1,)]) + + def test_query_table_def(self): + gs_url = self._write_csv_to_storage( + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', + HEADER_ROW, ROWS) + + job_config = bigquery.QueryJobConfig() + table_id = 'flintstones' + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.schema = SCHEMA + ec.options.skip_leading_rows = 1 # skip the header row + job_config.table_definitions = {table_id: ec} + sql = 'SELECT * FROM %s' % table_id + + got_rows = Config.CLIENT.query_rows(sql, job_config=job_config) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(ROWS, key=by_age)) + + def test_query_external_table(self): + gs_url = self._write_csv_to_storage( + 'bq_external_test' + unique_resource_id(), 'person_ages.csv', + HEADER_ROW, ROWS) + dataset_id = _make_dataset_id('query_external_table') + dataset = self.temp_dataset(dataset_id) + table_id = 'flintstones' + table_arg = Table(dataset.table(table_id), schema=SCHEMA) + ec = bigquery.ExternalConfig('CSV') + ec.source_uris = [gs_url] + ec.options.skip_leading_rows = 1 # skip the header row + table_arg.external_data_configuration = ec + table = Config.CLIENT.create_table(table_arg) + self.to_delete.insert(0, table) + + sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id) + + got_rows = Config.CLIENT.query_rows(sql) + + row_tuples = [r.values() for r in got_rows] + by_age = operator.itemgetter(1) + self.assertEqual(sorted(row_tuples, key=by_age), + sorted(ROWS, key=by_age)) - def test_insert_nested_nested(self): + def test_create_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField schema = [ @@ -1151,65 +1294,89 @@ def test_insert_nested_nested(self): to_insert = [ ('Some value', record) ] - table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('issue_2951')) - - retry_403(dataset.create)() - self.to_delete.append(dataset) - - table = dataset.table(table_name, schema=schema) - table.create() + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + table_arg = Table(dataset.table(table_id), schema=schema) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - table.insert_data(to_insert) + Config.CLIENT.create_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, to_insert) - self.assertEqual(rows, to_insert) - - def test_create_table_insert_fetch_nested_schema(self): + def test_create_rows_nested_nested_dictionary(self): + # See #2951 + SF = bigquery.SchemaField + schema = [ + SF('string_col', 'STRING', mode='NULLABLE'), + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_string', 'STRING', mode='NULLABLE'), + SF('nested_repeated', 'INTEGER', mode='REPEATED'), + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ + SF('nested_nested_string', 'STRING', mode='NULLABLE'), + ]), + ]), + ] + record = { + 'nested_string': 'another string value', + 'nested_repeated': [0, 1, 2], + 'nested_record': {'nested_nested_string': 'some deep insight'}, + } + to_insert = [ + {'string_col': 'Some value', 'record_col': record} + ] + table_id = 'test_table' + dataset = self.temp_dataset(_make_dataset_id('issue_2951')) + table_arg = Table(dataset.table(table_id), schema=schema) + table = retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) - table_name = 'test_table' - dataset = Config.CLIENT.dataset( - _make_dataset_name('create_table_nested_schema')) - self.assertFalse(dataset.exists()) + Config.CLIENT.create_rows(table, to_insert) - retry_403(dataset.create)() - self.to_delete.append(dataset) + retry = RetryResult(_has_rows, max_tries=8) + rows = retry(self._fetch_single_page)(table) + row_tuples = [r.values() for r in rows] + expected_rows = [('Some value', record)] + self.assertEqual(row_tuples, expected_rows) + def test_create_table_rows_fetch_nested_schema(self): + table_name = 'test_table' + dataset = self.temp_dataset( + _make_dataset_id('create_table_nested_schema')) schema = _load_json_schema() - table = dataset.table(table_name, schema=schema) - table.create() + table_arg = Table(dataset.table(table_name), schema=schema) + table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) - self.assertTrue(table.exists()) - self.assertEqual(table.name, table_name) + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_name) to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ json_filename = os.path.join(WHERE, 'data', 'characters.jsonl') with open(json_filename) as rows_file: for line in rows_file: - mapping = json.loads(line) - to_insert.append( - tuple(mapping[field.name] for field in schema)) + to_insert.append(json.loads(line)) - errors = table.insert_data(to_insert) + errors = Config.CLIENT.create_rows_json(table, to_insert) self.assertEqual(len(errors), 0) retry = RetryResult(_has_rows, max_tries=8) fetched = retry(self._fetch_single_page)(table) + fetched_tuples = [f.values() for f in fetched] + self.assertEqual(len(fetched), len(to_insert)) - for found, expected in zip(sorted(fetched), sorted(to_insert)): - self.assertEqual(found[0], expected[0]) # Name - self.assertEqual(found[1], int(expected[1])) # Age - self.assertEqual(found[2], expected[2]) # Weight - self.assertEqual(found[3], expected[3]) # IsMagic + for found, expected in zip(sorted(fetched_tuples), to_insert): + self.assertEqual(found[0], expected['Name']) + self.assertEqual(found[1], int(expected['Age'])) + self.assertEqual(found[2], expected['Weight']) + self.assertEqual(found[3], expected['IsMagic']) - self.assertEqual(len(found[4]), len(expected[4])) # Spells - for f_spell, e_spell in zip(found[4], expected[4]): + self.assertEqual(len(found[4]), len(expected['Spells'])) + for f_spell, e_spell in zip(found[4], expected['Spells']): self.assertEqual(f_spell['Name'], e_spell['Name']) parts = time.strptime( e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC') @@ -1223,18 +1390,42 @@ def test_create_table_insert_fetch_nested_schema(self): e_spell['Icon'].encode('ascii')) self.assertEqual(f_spell['Icon'], e_icon) - parts = time.strptime(expected[5], '%H:%M:%S') + parts = time.strptime(expected['TeaTime'], '%H:%M:%S') e_teatime = datetime.time(*parts[3:6]) - self.assertEqual(found[5], e_teatime) # TeaTime + self.assertEqual(found[5], e_teatime) - parts = time.strptime(expected[6], '%Y-%m-%d') + parts = time.strptime(expected['NextVacation'], '%Y-%m-%d') e_nextvac = datetime.date(*parts[0:3]) - self.assertEqual(found[6], e_nextvac) # NextVacation + self.assertEqual(found[6], e_nextvac) - parts = time.strptime(expected[7], '%Y-%m-%dT%H:%M:%S') + parts = time.strptime(expected['FavoriteTime'], + '%Y-%m-%dT%H:%M:%S') e_favtime = datetime.datetime(*parts[0:6]) - self.assertEqual(found[7], e_favtime) # FavoriteTime + self.assertEqual(found[7], e_favtime) + + def temp_dataset(self, dataset_id): + dataset = retry_403(Config.CLIENT.create_dataset)( + Dataset(Config.CLIENT.dataset(dataset_id))) + self.to_delete.append(dataset) + return dataset def _job_done(instance): return instance.state.lower() == 'done' + + +def _dataset_exists(ds): + try: + Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id)) + return True + except NotFound: + return False + + +def _table_exists(t): + try: + tr = DatasetReference(t.project, t.dataset_id).table(t.table_id) + Config.CLIENT.get_table(tr) + return True + except NotFound: + return False diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py index 9dc14f6e3a47..15a62107c645 100644 --- a/bigquery/tests/unit/test__helpers.py +++ b/bigquery/tests/unit/test__helpers.py @@ -16,8 +16,6 @@ import datetime import unittest -import mock - class Test_not_null(unittest.TestCase): @@ -158,6 +156,63 @@ def test_w_base64_encoded_text(self): self.assertEqual(coerced, expected) +class Test_timestamp_query_param_from_json(unittest.TestCase): + + def _call_fut(self, value, field): + from google.cloud.bigquery import _helpers + + return _helpers._timestamp_query_param_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field('NULLABLE'))) + + def test_w_timestamp_valid(self): + from google.cloud._helpers import UTC + + samples = [ + ( + '2016-12-20 15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27+00:00', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20 15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27.339328Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + ), + ( + '2016-12-20T15:58:27Z', + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC) + ), + ] + for timestamp_str, expected_result in samples: + self.assertEqual( + self._call_fut(timestamp_str, _Field('NULLABLE')), + expected_result) + + def test_w_timestamp_invalid(self): + with self.assertRaises(ValueError): + self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE')) + + class Test_timestamp_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -320,12 +375,12 @@ def test_w_record_subfield(self): self.assertEqual(coerced, expected) -class Test_row_from_json(unittest.TestCase): +class Test_row_tuple_from_json(unittest.TestCase): def _call_fut(self, row, schema): - from google.cloud.bigquery._helpers import _row_from_json + from google.cloud.bigquery._helpers import _row_tuple_from_json - return _row_from_json(row, schema) + return _row_tuple_from_json(row, schema) def test_w_single_scalar_column(self): # SELECT 1 AS col @@ -411,15 +466,36 @@ def test_w_array_of_struct_w_array(self): {u'first': [5, 6], u'second': 7}, ],)) + def test_row(self): + from google.cloud.bigquery._helpers import Row + + VALUES = (1, 2, 3) + r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2}) + self.assertEqual(r.a, 1) + self.assertEqual(r[1], 2) + self.assertEqual(r['c'], 3) + self.assertEqual(len(r), 3) + self.assertEqual(r.values(), VALUES) + self.assertEqual(repr(r), + "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})") + self.assertFalse(r != r) + self.assertFalse(r == 3) + with self.assertRaises(AttributeError): + r.z + with self.assertRaises(KeyError): + r['z'] + class Test_rows_from_json(unittest.TestCase): - def _call_fut(self, value, field): + def _call_fut(self, rows, schema): from google.cloud.bigquery._helpers import _rows_from_json - return _rows_from_json(value, field) + return _rows_from_json(rows, schema) def test_w_record_subfield(self): + from google.cloud.bigquery._helpers import Row + full_name = _Field('REQUIRED', 'full_name', 'STRING') area_code = _Field('REQUIRED', 'area_code', 'STRING') local_number = _Field('REQUIRED', 'local_number', 'STRING') @@ -455,15 +531,18 @@ def test_w_record_subfield(self): 'local_number': '768-5309', 'rank': 2, } + f2i = {'full_name': 0, 'phone': 1, 'color': 2} expected = [ - ('Phred Phlyntstone', phred_phone, ['orange', 'black']), - ('Bharney Rhubble', bharney_phone, ['brown']), - ('Wylma Phlyntstone', None, []), + Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i), + Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i), + Row(('Wylma Phlyntstone', None, []), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) def test_w_int64_float64_bool(self): + from google.cloud.bigquery._helpers import Row + # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'. candidate = _Field('REQUIRED', 'candidate', 'STRING') votes = _Field('REQUIRED', 'votes', 'INT64') @@ -490,10 +569,11 @@ def test_w_int64_float64_bool(self): {'v': 'false'}, ]}, ] + f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3} expected = [ - ('Phred Phlyntstone', 8, 0.25, True), - ('Bharney Rhubble', 4, 0.125, False), - ('Wylma Phlyntstone', 20, 0.625, False), + Row(('Phred Phlyntstone', 8, 0.25, True), f2i), + Row(('Bharney Rhubble', 4, 0.125, False), f2i), + Row(('Wylma Phlyntstone', 20, 0.625, False), f2i), ] coerced = self._call_fut(rows, schema) self.assertEqual(coerced, expected) @@ -669,64 +749,40 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), '12:13:41') -class Test_ConfigurationProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _ConfigurationProperty - - return _ConfigurationProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_it(self): - - class Configuration(object): - _attr = None +class Test_snake_to_camel_case(unittest.TestCase): - class Wrapper(object): - attr = self._make_one('attr') - - def __init__(self): - self._configuration = Configuration() - - self.assertEqual(Wrapper.attr.name, 'attr') + def _call_fut(self, value): + from google.cloud.bigquery._helpers import _snake_to_camel_case - wrapper = Wrapper() - self.assertIsNone(wrapper.attr) + return _snake_to_camel_case(value) - value = object() - wrapper.attr = value - self.assertIs(wrapper.attr, value) - self.assertIs(wrapper._configuration._attr, value) + def test_w_snake_case_string(self): + self.assertEqual(self._call_fut('friendly_name'), 'friendlyName') - del wrapper.attr - self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) + def test_w_camel_case_string(self): + self.assertEqual(self._call_fut('friendlyName'), 'friendlyName') -class Test_TypedProperty(unittest.TestCase): +class Test_TypedApiResourceProperty(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery._helpers import _TypedProperty + from google.cloud.bigquery._helpers import _TypedApiResourceProperty - return _TypedProperty + return _TypedApiResourceProperty def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_it(self): - class Configuration(object): - _attr = None - class Wrapper(object): - attr = self._make_one('attr', int) + attr = self._make_one('attr', 'back', int) def __init__(self): - self._configuration = Configuration() + self._properties = {} + + self.assertIsNotNone(Wrapper.attr) wrapper = Wrapper() with self.assertRaises(ValueError): @@ -734,1037 +790,43 @@ def __init__(self): wrapper.attr = 42 self.assertEqual(wrapper.attr, 42) - self.assertEqual(wrapper._configuration._attr, 42) + self.assertEqual(wrapper._properties['back'], 42) - del wrapper.attr + wrapper.attr = None self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) - + self.assertIsNone(wrapper._properties['back']) -class Test_EnumProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import _EnumProperty - - return _EnumProperty - - def test_it(self): - - class Sub(self._get_target_class()): - pass - - class Configuration(object): - _attr = None - - class Wrapper(object): - attr = Sub('attr') - - def __init__(self): - self._configuration = Configuration() - - wrapper = Wrapper() - wrapper.attr = 'FOO' - self.assertEqual(wrapper.attr, 'FOO') - self.assertEqual(wrapper._configuration._attr, 'FOO') + wrapper.attr = 23 + self.assertEqual(wrapper.attr, 23) + self.assertEqual(wrapper._properties['back'], 23) del wrapper.attr self.assertIsNone(wrapper.attr) - self.assertIsNone(wrapper._configuration._attr) + with self.assertRaises(KeyError): + wrapper._properties['back'] -class Test_UDFResourcesProperty(unittest.TestCase): +class Test_ListApiResourceProperty(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery._helpers import UDFResourcesProperty + from google.cloud.bigquery._helpers import _ListApiResourceProperty - return UDFResourcesProperty + return _ListApiResourceProperty def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _descriptor_and_klass(self): - descriptor = self._make_one() - - class _Test(object): - _udf_resources = () - udf_resources = descriptor - - return descriptor, _Test - - def test_class_getter(self): - descriptor, klass = self._descriptor_and_klass() - self.assertIs(klass.udf_resources, descriptor) - - def test_instance_getter_empty(self): - _, klass = self._descriptor_and_klass() - instance = klass() - self.assertEqual(instance.udf_resources, []) - - def test_resource_equality(self): - from google.cloud.bigquery._helpers import UDFResource - - resource1a = UDFResource('resourceUri', 'gs://bucket/file.js') - resource1b = UDFResource('resourceUri', 'gs://bucket/file.js') - resource2 = UDFResource('resourceUri', 'gs://bucket/other.js') - - self.assertEqual(resource1a, resource1b) - self.assertNotEqual(resource1a, resource2) - self.assertNotEqual(resource1a, object()) - self.assertEqual(resource1a, mock.ANY) - - def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = tuple(udf_resources) - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - instance._udf_resources = udf_resources - - instance.udf_resources = [] - - self.assertEqual(instance.udf_resources, []) - - def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - _, klass = self._descriptor_and_klass() - instance = klass() - - instance.udf_resources = udf_resources - - self.assertEqual(instance.udf_resources, udf_resources) - - def test_instance_setter_w_bad_udfs(self): - _, klass = self._descriptor_and_klass() - instance = klass() - - with self.assertRaises(ValueError): - instance.udf_resources = ["foo"] - - self.assertEqual(instance.udf_resources, []) - - -class Test_AbstractQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import AbstractQueryParameter + from google.cloud.bigquery.query import _AbstractQueryParameter - return AbstractQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_from_api_virtual(self): - klass = self._get_target_class() - with self.assertRaises(NotImplementedError): - klass.from_api_repr({}) - - def test_to_api_virtual(self): - param = self._make_one() - with self.assertRaises(NotImplementedError): - param.to_api_repr() - - -class Test_ScalarQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': 123, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.type_, 'INT64') - self.assertEqual(param.value, 123) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - param = self._make_one(name='foo', type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='INT64', value=123) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_float(self): - EXPECTED = { - 'parameterType': { - 'type': 'FLOAT64', - }, - 'parameterValue': { - 'value': 12.345, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='FLOAT64', value=12.345) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_bool(self): - EXPECTED = { - 'parameterType': { - 'type': 'BOOL', - }, - 'parameterValue': { - 'value': 'false', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='BOOL', value=False) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_datetime(self): - from google.cloud._helpers import UTC - - STAMP = '2016-12-20 15:58:27.339328+00:00' - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': STAMP, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=when) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime - - now = datetime.datetime.utcnow() - seconds = _microseconds_from_datetime(now) / 1.0e6 - EXPECTED = { - 'parameterType': { - 'type': 'TIMESTAMP', - }, - 'parameterValue': { - 'value': seconds, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='TIMESTAMP', value=seconds) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 - - now = datetime.datetime.utcnow() - now_str = _datetime_to_rfc3339(now) - EXPECTED = { - 'parameterType': { - 'type': 'DATETIME', - }, - 'parameterValue': { - 'value': now_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATETIME', value=now_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_date(self): - today = datetime.date.today() - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today.isoformat(), - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_date_string(self): - today = datetime.date.today() - today_str = today.isoformat(), - EXPECTED = { - 'parameterType': { - 'type': 'DATE', - }, - 'parameterValue': { - 'value': today_str, - }, - } - klass = self._get_target_class() - param = klass.positional(type_='DATE', value=today_str) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'UNKNOWN', - }, - 'parameterValue': { - 'value': 'unknown', - }, - } - klass = self._get_target_class() - param = klass.positional(type_='UNKNOWN', value='unknown') - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', 'value') - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('test', 'STRING', 'value') - other = self._make_one('other', 'STRING', 'value') - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', None) - other = self._make_one('test', 'INT64', None) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', 'hello') - other = self._make_one('test', 'STRING', 'world') - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', 'gotcha') - other = self._make_one('test', 'STRING', 'gotcha') - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', 13) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', 12) - field2 = self._make_one('test', 'INT64', 12) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', 11) - field2 = self._make_one('test', 'INT64', 12) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', 'value') - expected = "ScalarQueryParameter('field1', 'STRING', 'value')" - self.assertEqual(repr(field1), expected) - - -def _make_subparam(name, type_, value): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - return ScalarQueryParameter(name, type_, value) - - -class Test_ArrayQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - return ArrayQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_positional(self): - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.array_type, 'INT64') - self.assertEqual(param.values, [1, 2]) - - def test_from_api_repr_w_struct_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - RESOURCE = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - { - 'name': 'name', - 'type': {'type': 'STRING'}, - }, - { - 'name': 'age', - 'type': {'type': 'INT64'}, - }, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'structValues': { - 'name': {'value': 'Phred Phlyntstone'}, - 'age': {'value': '32'}, - }, - }, - { - 'structValues': { - 'name': { - 'value': 'Bharney Rhubbyl', - }, - 'age': {'value': '31'}, - }, - }, - ], - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - phred = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Phred Phlyntstone'), - _make_subparam('age', 'INT64', 32)) - bharney = StructQueryParameter.positional( - _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), - _make_subparam('age', 'INT64', 31)) - self.assertEqual(param.array_type, 'STRUCT') - self.assertEqual(param.values, [phred, bharney]) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'INT64', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': '1', - }, - { - 'value': '2' - }, - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='INT64', values=[1, 2]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_unknown_type(self): - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'UNKNOWN', - }, - }, - 'parameterValue': { - 'arrayValues': [ - { - 'value': 'unknown', - } - ], - }, - } - klass = self._get_target_class() - param = klass.positional(array_type='UNKNOWN', values=['unknown']) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_record_type(self): - from google.cloud.bigquery._helpers import StructQueryParameter - - EXPECTED = { - 'parameterType': { - 'type': 'ARRAY', - 'arrayType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'foo', 'type': {'type': 'STRING'}}, - {'name': 'bar', 'type': {'type': 'INT64'}}, - ], - }, - }, - 'parameterValue': { - 'arrayValues': [{ - 'structValues': { - 'foo': {'value': 'Foo'}, - 'bar': {'value': '123'}, - } - }] - }, - } - one = _make_subparam('foo', 'STRING', 'Foo') - another = _make_subparam('bar', 'INT64', 123) - struct = StructQueryParameter.positional(one, another) - klass = self._get_target_class() - param = klass.positional(array_type='RECORD', values=[struct]) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one('test', 'STRING', ['value']) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one('field', 'STRING', ['value']) - other = self._make_one('other', 'STRING', ['value']) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one('test', 'STRING', []) - other = self._make_one('test', 'INT64', []) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one('test', 'STRING', ['hello']) - other = self._make_one('test', 'STRING', ['hello', 'world']) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one('test', 'STRING', ['gotcha']) - other = self._make_one('test', 'STRING', ['gotcha']) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one('toast', 'INT64', [13]) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one('test', 'INT64', [12]) - field2 = self._make_one('test', 'INT64', [12]) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one('test', 'INT64', [11]) - field2 = self._make_one('test', 'INT64', [12]) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one('field1', 'STRING', ['value']) - expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" - self.assertEqual(repr(field1), expected) - - -class Test_StructQueryParameter(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import StructQueryParameter - - return StructQueryParameter - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_positional(self): - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_name(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_wo_name(self): - RESOURCE = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 123}, - 'baz': {'value': 'abc'}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual(param.name, None) - self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) - self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) - - def test_from_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - self.assertEqual( - param, - self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - ArrayQueryParameter('baz', 'INT64', [123, 456]))) - - def test_from_api_repr_w_nested_struct(self): - RESOURCE = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - - klass = self._get_target_class() - param = klass.from_api_repr(RESOURCE) - - expected = self._make_one( - 'foo', - _make_subparam('bar', 'STRING', 'abc'), - self._make_one( - 'baz', - _make_subparam('qux', 'INT64', 123), - _make_subparam('spam', 'BOOL', True))) - self.assertEqual(param.name, 'foo') - self.assertEqual(param.struct_types, expected.struct_types) - self.assertEqual(param.struct_values, expected.struct_values) - - def test_to_api_repr_w_name(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - param = self._make_one('foo', sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_wo_name(self): - EXPECTED = { - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'INT64'}}, - {'name': 'baz', 'type': {'type': 'STRING'}}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': '123'}, - 'baz': {'value': 'abc'}, - }, - }, - } - sub_1 = _make_subparam('bar', 'INT64', 123) - sub_2 = _make_subparam('baz', 'STRING', 'abc') - klass = self._get_target_class() - param = klass.positional(sub_1, sub_2) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_array(self): - from google.cloud.bigquery._helpers import ArrayQueryParameter - - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'ARRAY', - 'arrayType': {'type': 'INT64'}, - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'arrayValues': [ - {'value': '123'}, - {'value': '456'}, - ]}, - }, - }, - } - scalar = _make_subparam('bar', 'STRING', 'abc') - array = ArrayQueryParameter('baz', 'INT64', [123, 456]) - param = self._make_one('foo', scalar, array) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test_to_api_repr_w_nested_struct(self): - EXPECTED = { - 'name': 'foo', - 'parameterType': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'bar', 'type': {'type': 'STRING'}}, - {'name': 'baz', 'type': { - 'type': 'STRUCT', - 'structTypes': [ - {'name': 'qux', 'type': {'type': 'INT64'}}, - {'name': 'spam', 'type': {'type': 'BOOL'}}, - ], - }}, - ], - }, - 'parameterValue': { - 'structValues': { - 'bar': {'value': 'abc'}, - 'baz': {'structValues': { - 'qux': {'value': '123'}, - 'spam': {'value': 'true'}, - }}, - }, - }, - } - scalar_1 = _make_subparam('bar', 'STRING', 'abc') - scalar_2 = _make_subparam('qux', 'INT64', 123) - scalar_3 = _make_subparam('spam', 'BOOL', True) - sub = self._make_one('baz', scalar_2, scalar_3) - param = self._make_one('foo', scalar_1, sub) - self.assertEqual(param.to_api_repr(), EXPECTED) - - def test___eq___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___eq___name_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'abc')) - other = self._make_one( - 'other ', _make_subparam('bar', 'STRING', 'abc')) - self.assertNotEqual(field, other) - - def test___eq___field_type_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', None)) - other = self._make_one( - 'test', _make_subparam('bar', 'INT64', None)) - self.assertNotEqual(field, other) - - def test___eq___value_mismatch(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field, other) - - def test___eq___hit(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - other = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'gotcha')) - self.assertEqual(field, other) - - def test___ne___wrong_type(self): - field = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - other = object() - self.assertNotEqual(field, other) - self.assertEqual(field, mock.ANY) - - def test___ne___same_value(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - # unittest ``assertEqual`` uses ``==`` not ``!=``. - comparison_val = (field1 != field2) - self.assertFalse(comparison_val) - - def test___ne___different_values(self): - field1 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'hello')) - field2 = self._make_one( - 'test', _make_subparam('bar', 'STRING', 'world')) - self.assertNotEqual(field1, field2) - - def test___repr__(self): - field1 = self._make_one( - 'test', _make_subparam('field1', 'STRING', 'hello')) - got = repr(field1) - self.assertIn('StructQueryParameter', got) - self.assertIn("'field1', 'STRING'", got) - self.assertIn("'field1': 'hello'", got) - - -class Test_QueryParametersProperty(unittest.TestCase): - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery._helpers import QueryParametersProperty - - return QueryParametersProperty - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _descriptor_and_klass(self): - descriptor = self._make_one() + descriptor = self._make_one( + 'query_parameters', 'queryParameters', _AbstractQueryParameter) class _Test(object): - _query_parameters = () + def __init__(self): + self._properties = {} + query_parameters = descriptor return descriptor, _Test @@ -1779,17 +841,17 @@ def test_instance_getter_empty(self): self.assertEqual(instance.query_parameters, []) def test_instance_getter_w_non_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() instance = klass() - instance._query_parameters = tuple(query_parameters) + instance._properties['queryParameters'] = query_parameters self.assertEqual(instance.query_parameters, query_parameters) def test_instance_setter_w_empty_list(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() @@ -1800,8 +862,19 @@ def test_instance_setter_w_empty_list(self): self.assertEqual(instance.query_parameters, []) + def test_instance_setter_w_none(self): + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] + _, klass = self._descriptor_and_klass() + instance = klass() + instance._query_parameters = query_parameters + + with self.assertRaises(ValueError): + instance.query_parameters = None + def test_instance_setter_w_valid_udf(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] _, klass = self._descriptor_and_klass() diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index dc998926d434..c71847e367e0 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -12,9 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +import email +import io +import json import unittest import mock +import six +from six.moves import http_client +import pytest + +from google.cloud.bigquery.dataset import DatasetReference def _make_credentials(): @@ -25,6 +34,11 @@ def _make_credentials(): class TestClient(unittest.TestCase): + PROJECT = 'PROJECT' + DS_ID = 'DATASET_ID' + TABLE_ID = 'TABLE_ID' + TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + @staticmethod def _get_target_class(): from google.cloud.bigquery.client import Client @@ -37,10 +51,10 @@ def _make_one(self, *args, **kw): def test_ctor(self): from google.cloud.bigquery._http import Connection - PROJECT = 'PROJECT' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -48,14 +62,13 @@ def test_ctor(self): def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound - project = 'PROJECT' creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection() with self.assertRaises(NotFound): client._get_query_results( - 'nothere', project='other-project', timeout_ms=500) + 'nothere', None, project='other-project', timeout_ms=500) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -66,7 +79,6 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): req['query_params'], {'maxResults': 0, 'timeoutMs': 500}) def test__get_query_results_hit(self): - project = 'PROJECT' job_id = 'query_job' data = { 'kind': 'bigquery#getQueryResultsResponse', @@ -86,7 +98,7 @@ def test__get_query_results_hit(self): ] }, 'jobReference': { - 'projectId': project, + 'projectId': self.PROJECT, 'jobId': job_id, }, 'totalRows': '10', @@ -96,15 +108,14 @@ def test__get_query_results_hit(self): } creds = _make_credentials() - client = self._make_one(project, creds) + client = self._make_one(self.PROJECT, creds) client._connection = _Connection(data) - query_results = client._get_query_results(job_id) + query_results = client._get_query_results(job_id, None) self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) def test_list_projects_defaults(self): - import six from google.cloud.bigquery.client import Project PROJECT_1 = 'PROJECT_ONE' @@ -149,14 +160,11 @@ def test_list_projects_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_projects_explicit_response_missing_projects_key(self): - import six - - PROJECT = 'PROJECT' PATH = 'projects' TOKEN = 'TOKEN' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_projects(max_results=3, page_token=TOKEN) @@ -175,31 +183,29 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - import six from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' DATA = { 'nextPageToken': TOKEN, 'datasets': [ {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_1), + 'id': '%s:%s' % (self.PROJECT, DATASET_1), 'datasetReference': {'datasetId': DATASET_1, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': None}, {'kind': 'bigquery#dataset', - 'id': '%s:%s' % (PROJECT, DATASET_2), + 'id': '%s:%s' % (self.PROJECT, DATASET_2), 'datasetReference': {'datasetId': DATASET_2, - 'projectId': PROJECT}, + 'projectId': self.PROJECT}, 'friendlyName': 'Two'}, ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets() @@ -210,7 +216,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): self.assertIsInstance(found, Dataset) - self.assertEqual(found.dataset_id, expected['id']) + self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) @@ -220,18 +226,17 @@ def test_list_datasets_defaults(self): self.assertEqual(req['path'], '/%s' % PATH) def test_list_datasets_explicit_response_missing_datasets_key(self): - import six - - PROJECT = 'PROJECT' - PATH = 'projects/%s/datasets' % PROJECT + PATH = 'projects/%s/datasets' % self.PROJECT TOKEN = 'TOKEN' + FILTER = 'FILTER' DATA = {} creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_datasets( - include_all=True, max_results=3, page_token=TOKEN) + include_all=True, filter=FILTER, + max_results=3, page_token=TOKEN) page = six.next(iterator.pages) datasets = list(page) token = iterator.next_page_token @@ -244,55 +249,995 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): self.assertEqual(req['method'], 'GET') self.assertEqual(req['path'], '/%s' % PATH) self.assertEqual(req['query_params'], - {'all': True, 'maxResults': 3, 'pageToken': TOKEN}) + {'all': True, 'filter': FILTER, + 'maxResults': 3, 'pageToken': TOKEN}) + + def test_dataset_with_specified_project(self): + from google.cloud.bigquery.dataset import DatasetReference + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID, self.PROJECT) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + + def test_dataset_with_default_project(self): + from google.cloud.bigquery.dataset import DatasetReference + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + dataset = client.dataset(self.DS_ID) + self.assertIsInstance(dataset, DatasetReference) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + + def test_get_dataset(self): + from google.cloud.exceptions import ServerError + + path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + resource = { + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), + 'datasetReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + }, + } + conn = client._connection = _Connection(resource) + dataset_ref = client.dataset(self.DS_ID) + + dataset = client.get_dataset(dataset_ref) - def test_dataset(self): + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(dataset.dataset_id, self.DS_ID) + + # Test retry. + + # Not a cloud API exception (missing 'errors' field). + client._connection = _Connection(Exception(''), resource) + with self.assertRaises(Exception): + client.get_dataset(dataset_ref) + + # Zero-length errors field. + client._connection = _Connection(ServerError(''), resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Non-retryable reason. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'serious'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref) + + # Retryable reason, but retry is disabled. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + with self.assertRaises(ServerError): + client.get_dataset(dataset_ref, retry=None) + + # Retryable reason, default retry: success. + client._connection = _Connection( + ServerError('', errors=[{'reason': 'backendError'}]), + resource) + dataset = client.get_dataset(dataset_ref) + self.assertEqual(dataset.dataset_id, self.DS_ID) + + def test_create_dataset_minimal(self): from google.cloud.bigquery.dataset import Dataset - PROJECT = 'PROJECT' - DATASET = 'dataset_name' + PATH = 'projects/%s/datasets' % self.PROJECT + RESOURCE = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + ds = client.create_dataset(Dataset(client.dataset(self.DS_ID))) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'labels': {}, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + + def test_create_dataset_w_attrs(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PATH = 'projects/%s/datasets' % self.PROJECT + DESCRIPTION = 'DESC' + FRIENDLY_NAME = 'FN' + LOCATION = 'US' + USER_EMAIL = 'phred@example.com' + LABELS = {'color': 'red'} + VIEW = { + 'projectId': 'my-proj', + 'datasetId': 'starry-skies', + 'tableId': 'northern-hemisphere', + } + RESOURCE = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'id': "%s:%s" % (self.PROJECT, self.DS_ID), + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, + 'labels': LABELS, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE) + entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL), + AccessEntry(None, 'view', VIEW)] + ds_arg = Dataset(client.dataset(self.DS_ID)) + ds_arg.access_entries = entries + ds_arg.description = DESCRIPTION + ds_arg.friendly_name = FRIENDLY_NAME + ds_arg.default_table_expiration_ms = 3600 + ds_arg.location = LOCATION + ds_arg.labels = LABELS + ds = client.create_dataset(ds_arg) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + SENT = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': 3600, + 'access': [ + {'role': 'OWNER', 'userByEmail': USER_EMAIL}, + {'view': VIEW}], + 'labels': LABELS, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(ds.dataset_id, self.DS_ID) + self.assertEqual(ds.project, self.PROJECT) + self.assertEqual(ds.etag, RESOURCE['etag']) + self.assertEqual(ds.full_dataset_id, RESOURCE['id']) + self.assertEqual(ds.description, DESCRIPTION) + self.assertEqual(ds.friendly_name, FRIENDLY_NAME) + self.assertEqual(ds.location, LOCATION) + self.assertEqual(ds.default_table_expiration_ms, 3600) + self.assertEqual(ds.labels, LABELS) + + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + table.partitioning_type = 'DAY' + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'timePartitioning': {'type': 'DAY'}, + 'labels': {}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(got.table_id, self.TABLE_ID) + + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + table.partitioning_type = 'DAY' + table.partition_expiration = 100 + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, + 'labels': {}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(table.partitioning_type, "DAY") + self.assertEqual(table.partition_expiration, 100) + self.assertEqual(got.table_id, self.TABLE_ID) + + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.table import Table, SchemaField + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': {'query': query}, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.view_query = query + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'view': {'query': query, 'useLegacySql': False}, + 'labels': {}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) + + def test_create_table_w_external(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.external_config import ExternalConfig + + path = 'projects/%s/datasets/%s/tables' % ( + self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + }, + } + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig('CSV') + ec.autodetect = True + table.external_data_configuration = ec + + got = client.create_table(table) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'autodetect': True, + }, + 'labels': {}, + } + self.assertEqual(req['data'], sent) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.external_data_configuration.source_format, 'CSV') + self.assertEqual(got.external_data_configuration.autodetect, True) + + def test_get_table(self): + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) - self.assertIsInstance(dataset, Dataset) - self.assertEqual(dataset.name, DATASET) - self.assertIs(dataset._client, client) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, + }, + } + conn = client._connection = _Connection(resource) + table = client.get_table(self.TABLE_REF) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(table.table_id, self.TABLE_ID) + + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"]) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + DESCRIPTION = 'DESCRIPTION' + FRIENDLY_NAME = 'TITLE' + LOCATION = 'loc' + LABELS = {'priority': 'high'} + ACCESS = [ + {'role': 'OWNER', 'userByEmail': 'phred@example.com'}, + ] + EXP = 17 + RESOURCE = { + 'datasetReference': + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, + 'etag': "etag", + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'defaultTableExpirationMs': EXP, + 'labels': LABELS, + 'access': ACCESS, + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(RESOURCE, RESOURCE) + ds = Dataset(client.dataset(self.DS_ID)) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds.labels = LABELS + ds.access_entries = [ + AccessEntry('OWNER', 'userByEmail', 'phred@example.com')] + ds2 = client.update_dataset( + ds, ['description', 'friendly_name', 'location', 'labels', + 'access_entries']) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + SENT = { + 'description': DESCRIPTION, + 'friendlyName': FRIENDLY_NAME, + 'location': LOCATION, + 'labels': LABELS, + 'access': ACCESS, + } + self.assertEqual(req['data'], SENT) + self.assertEqual(req['path'], '/' + PATH) + self.assertIsNone(req['headers']) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) + + # ETag becomes If-Match header. + ds._properties['etag'] = 'etag' + client.update_dataset(ds, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + + def test_update_table(self): + from google.cloud.bigquery.table import Table, SchemaField + + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + description = 'description' + title = 'title' + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}] + }, + 'etag': 'etag', + 'description': description, + 'friendlyName': title, + 'labels': {'x': 'y'}, + } + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(resource, resource) + table = Table(self.TABLE_REF, schema=schema) + table.description = description + table.friendly_name = title + table.labels = {'x': 'y'} + + updated_table = client.update_table( + table, ['schema', 'description', 'friendly_name', 'labels']) + + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, + 'description': description, + 'friendlyName': title, + 'labels': {'x': 'y'}, + } + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/' + path) + self.assertIsNone(req['headers']) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.labels, table.labels) + + # ETag becomes If-Match header. + table._properties['etag'] = 'etag' + client.update_table(table, []) + req = conn._requested[1] + self.assertEqual(req['headers']['If-Match'], 'etag') + + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'view': {'useLegacySql': True} + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF) + table.view_use_legacy_sql = True + + updated_table = client.update_table(table, ['view_use_legacy_sql']) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'view': {'useLegacySql': True} + } + self.assertEqual(req['data'], sent) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.table import Table, SchemaField + + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + query = 'select fullname, age from person_ages' + location = 'EU' + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED') + ] + resource = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'schema': schema_resource, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time) + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.location = location + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ['schema', 'view_query', 'location', + 'expires', 'view_use_legacy_sql'] + + updated_table = client.update_table(table, updated_properties) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'view': {'query': query, 'useLegacySql': True}, + 'location': location, + 'expirationTime': _millis(exp_time), + 'schema': schema_resource, + } + self.assertEqual(req['data'], sent) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.location, table.location) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual( + updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + resource1 = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]} + } + resource2 = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID}, + 'schema': {'fields': []}, + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table = client.get_table(self.TABLE_REF) + table.schema = None + + updated_table = client.update_table(table, ['schema']) + + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'schema': None + } + self.assertEqual(req['data'], sent) + self.assertEqual(req['path'], '/%s' % path) + self.assertEqual(updated_table.schema, table.schema) + + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table + + description = 'description' + title = 'title' + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + resource1 = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'description': description, + 'friendlyName': title, + } + resource2 = { + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID), + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'description': None, + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(resource1, resource2) + table = Table(self.TABLE_REF) + table.description = description + table.friendly_name = title + table2 = client.update_table(table, ['description', 'friendly_name']) + self.assertEqual(table2.description, table.description) + table2.description = None + + table3 = client.update_table(table2, ['description']) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[1] + self.assertEqual(req['method'], 'PATCH') + self.assertEqual(req['path'], '/%s' % path) + sent = { + 'tableReference': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID + }, + 'description': None, + } + self.assertEqual(req['data'], sent) + self.assertIsNone(table3.description) + + def test_list_dataset_tables_empty(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection({}) + + dataset = client.dataset(self.DS_ID) + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(tables, []) + self.assertIsNone(token) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_defaults(self): + from google.cloud.bigquery.table import Table + + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'nextPageToken': TOKEN, + 'tables': [ + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#table', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(self.DS_ID) + + iterator = client.list_dataset_tables(dataset) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertEqual(token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_dataset_tables_explicit(self): + from google.cloud.bigquery.table import Table + + TABLE_1 = 'table_one' + TABLE_2 = 'table_two' + PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID) + TOKEN = 'TOKEN' + DATA = { + 'tables': [ + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1), + 'tableReference': {'tableId': TABLE_1, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + {'kind': 'bigquery#dataset', + 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2), + 'tableReference': {'tableId': TABLE_2, + 'datasetId': self.DS_ID, + 'projectId': self.PROJECT}, + 'type': 'TABLE'}, + ] + } + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection(DATA) + dataset = client.dataset(self.DS_ID) + + iterator = client.list_dataset_tables( + dataset, max_results=3, page_token=TOKEN) + self.assertIs(iterator.dataset, dataset) + page = six.next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + self.assertEqual(len(tables), len(DATA['tables'])) + for found, expected in zip(tables, DATA['tables']): + self.assertIsInstance(found, Table) + self.assertEqual(found.full_table_id, expected['id']) + self.assertEqual(found.table_type, expected['type']) + self.assertIsNone(token) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], + {'maxResults': 3, 'pageToken': TOKEN}) + + def test_list_dataset_tables_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.list_dataset_tables(client.dataset(self.DS_ID).table("foo")) + + def test_delete_dataset(self): + from google.cloud.bigquery.dataset import Dataset + + PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = _Connection({}, {}) + ds_ref = client.dataset(self.DS_ID) + for arg in (ds_ref, Dataset(ds_ref)): + client.delete_dataset(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_delete_dataset_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_dataset(client.dataset(self.DS_ID).table("foo")) + + def test_delete_table(self): + from google.cloud.bigquery.table import Table + + path = 'projects/%s/datasets/%s/tables/%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}, {}) + + for arg in (self.TABLE_REF, Table(self.TABLE_REF)): + client.delete_table(arg) + req = conn._requested[0] + self.assertEqual(req['method'], 'DELETE') + self.assertEqual(req['path'], '/%s' % path) + + def test_delete_table_w_wrong_type(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(TypeError): + client.delete_table(client.dataset(self.DS_ID)) def test_job_from_resource_unknown_type(self): - PROJECT = 'PROJECT' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) with self.assertRaises(ValueError): client.job_from_resource({'configuration': {'nonesuch': {}}}) + def test_get_job_miss_w_explict_project(self): + from google.cloud.exceptions import NotFound + + OTHER_PROJECT = 'OTHER_PROJECT' + JOB_ID = 'NONESUCH' + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _Connection() + + with self.assertRaises(NotFound): + client.get_job(JOB_ID, project=OTHER_PROJECT) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH') + self.assertEqual(req['query_params'], {'projection': 'full'}) + + def test_get_job_hit(self): + from google.cloud.bigquery.job import QueryJob + + JOB_ID = 'query_job' + QUERY_DESTINATION_TABLE = 'query_destination_table' + QUERY = 'SELECT * from test_dataset:test_table' + ASYNC_QUERY_DATA = { + 'id': '{}:{}'.format(self.PROJECT, JOB_ID), + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': 'query_job', + }, + 'state': 'DONE', + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': QUERY_DESTINATION_TABLE, + }, + 'createDisposition': 'CREATE_IF_NEEDED', + 'writeDisposition': 'WRITE_TRUNCATE', + } + }, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = _Connection(ASYNC_QUERY_DATA) + + job = client.get_job(JOB_ID) + + self.assertIsInstance(job, QueryJob) + self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED') + self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE') + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job') + self.assertEqual(req['query_params'], {'projection': 'full'}) + def test_list_jobs_defaults(self): - import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' DESTINATION_TABLE = 'destination_table' QUERY_DESTINATION_TABLE = 'query_destination_table' SOURCE_URI = 'gs://test_bucket/src_object*' DESTINATION_URI = 'gs://test_bucket/dst_object*' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, 'copy_job': CopyJob, - 'extract_job': ExtractTableToStorageJob, + 'extract_job': ExtractJob, 'query_job': QueryJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' QUERY = 'SELECT * from test_dataset:test_table' ASYNC_QUERY_DATA = { - 'id': '%s:%s' % (PROJECT, 'query_job'), + 'id': '%s:%s' % (self.PROJECT, 'query_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'query_job', }, 'state': 'DONE', @@ -300,8 +1245,8 @@ def test_list_jobs_defaults(self): 'query': { 'query': QUERY, 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': QUERY_DESTINATION_TABLE, }, 'createDisposition': 'CREATE_IF_NEEDED', @@ -310,17 +1255,17 @@ def test_list_jobs_defaults(self): }, } EXTRACT_DATA = { - 'id': '%s:%s' % (PROJECT, 'extract_job'), + 'id': '%s:%s' % (self.PROJECT, 'extract_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'extract_job', }, 'state': 'DONE', 'configuration': { 'extract': { 'sourceTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'destinationUris': [DESTINATION_URI], @@ -328,39 +1273,39 @@ def test_list_jobs_defaults(self): }, } COPY_DATA = { - 'id': '%s:%s' % (PROJECT, 'copy_job'), + 'id': '%s:%s' % (self.PROJECT, 'copy_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'copy_job', }, 'state': 'DONE', 'configuration': { 'copy': { 'sourceTables': [{ - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }], 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': DESTINATION_TABLE, }, } }, } LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, 'sourceUris': [SOURCE_URI], @@ -377,7 +1322,7 @@ def test_list_jobs_defaults(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -389,7 +1334,7 @@ def test_list_jobs_defaults(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -399,29 +1344,26 @@ def test_list_jobs_defaults(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_load_job_wo_sourceUris(self): - import six - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' - DATASET = 'test_dataset' SOURCE_TABLE = 'source_table' JOB_TYPES = { - 'load_job': LoadTableFromStorageJob, + 'load_job': LoadJob, } - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT TOKEN = 'TOKEN' LOAD_DATA = { - 'id': '%s:%s' % (PROJECT, 'load_job'), + 'id': '%s:%s' % (self.PROJECT, 'load_job'), 'jobReference': { - 'projectId': PROJECT, + 'projectId': self.PROJECT, 'jobId': 'load_job', }, 'state': 'DONE', 'configuration': { 'load': { 'destinationTable': { - 'projectId': PROJECT, - 'datasetId': DATASET, + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, 'tableId': SOURCE_TABLE, }, } @@ -434,7 +1376,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): ] } creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs() @@ -446,7 +1388,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): for found, expected in zip(jobs, DATA['jobs']): name = expected['jobReference']['jobId'] self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.name, name) + self.assertEqual(found.job_id, name) self.assertEqual(token, TOKEN) self.assertEqual(len(conn._requested), 1) @@ -456,14 +1398,11 @@ def test_list_jobs_load_job_wo_sourceUris(self): self.assertEqual(req['query_params'], {'projection': 'full'}) def test_list_jobs_explicit_missing(self): - import six - - PROJECT = 'PROJECT' - PATH = 'projects/%s/jobs' % PROJECT + PATH = 'projects/%s/jobs' % self.PROJECT DATA = {} TOKEN = 'TOKEN' creds = _make_credentials() - client = self._make_one(PROJECT, creds) + client = self._make_one(self.PROJECT, creds) conn = client._connection = _Connection(DATA) iterator = client.list_jobs(max_results=1000, page_token=TOKEN, @@ -486,180 +1425,1800 @@ def test_list_jobs_explicit_missing(self): 'allUsers': True, 'stateFilter': 'done'}) - def test_load_table_from_storage(self): - from google.cloud.bigquery.job import LoadTableFromStorageJob + def test_load_table_from_uri(self): + from google.cloud.bigquery.job import LoadJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' DESTINATION = 'destination_table' SOURCE_URI = 'http://example.com/source.csv' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'load': { + 'sourceUris': [SOURCE_URI], + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) - destination = dataset.table(DESTINATION) - job = client.load_table_from_storage(JOB, destination, SOURCE_URI) - self.assertIsInstance(job, LoadTableFromStorageJob) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + destination = client.dataset(self.DS_ID).table(DESTINATION) + + job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) + + # Check that load_table_from_uri actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) + + self.assertIsInstance(job, LoadJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) self.assertIs(job.destination, destination) + conn = client._connection = _Connection(RESOURCE) + + job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB) + self.assertIsInstance(job, LoadJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.source_uris), [SOURCE_URI]) + self.assertIs(job.destination, destination) + + @staticmethod + def _mock_requests_response(status_code, headers, content=b''): + return mock.Mock( + content=content, headers=headers, status_code=status_code, + spec=['content', 'headers', 'status_code']) + + def _mock_transport(self, status_code, headers, content=b''): + fake_transport = mock.Mock(spec=['request']) + fake_response = self._mock_requests_response( + status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + def _initiate_resumable_upload_helper(self, num_retries=None): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + # Create mocks to be checked for doing transport. + resumable_url = 'http://test.invalid?upload_id=hey-you' + response_headers = {'location': resumable_url} + fake_transport = self._mock_transport( + http_client.OK, response_headers) + client = self._make_one(project=self.PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + + # Create some mock arguments and call the method under test. + data = b'goodbye gudbi gootbee' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) + metadata = job._build_resource() + upload, transport = client._initiate_resumable_upload( + stream, metadata, num_retries) + + # Check the returned values. + self.assertIsInstance(upload, ResumableUpload) + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=resumable') + self.assertEqual(upload.upload_url, upload_url) + expected_headers = _get_upload_headers(conn.USER_AGENT) + self.assertEqual(upload._headers, expected_headers) + self.assertFalse(upload.finished) + self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) + self.assertIs(upload._stream, stream) + self.assertIsNone(upload._total_bytes) + self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) + self.assertEqual(upload.resumable_url, resumable_url) + + retry_strategy = upload._retry_strategy + self.assertEqual(retry_strategy.max_sleep, 64.0) + if num_retries is None: + self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) + self.assertIsNone(retry_strategy.max_retries) + else: + self.assertIsNone(retry_strategy.max_cumulative_retry) + self.assertEqual(retry_strategy.max_retries, num_retries) + self.assertIs(transport, fake_transport) + # Make sure we never read from the stream. + self.assertEqual(stream.tell(), 0) + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=json.dumps(metadata).encode('utf-8'), + headers=request_headers, + ) + + def test__initiate_resumable_upload(self): + self._initiate_resumable_upload_helper() + + def test__initiate_resumable_upload_with_retry(self): + self._initiate_resumable_upload_helper(num_retries=11) + + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob, LoadJobConfig + + fake_transport = self._mock_transport(http_client.OK, {}) + client = self._make_one(project=self.PROJECT, _http=fake_transport) + conn = client._connection = _Connection() + + # Create some mock arguments. + data = b'Bzzzz-zap \x00\x01\xf4' + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = 'CSV' + job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) + metadata = job._build_resource() + size = len(data) + response = client._do_multipart_upload( + stream, metadata, size, num_retries) + + # Check the mocks and the returned value. + self.assertIs(response, fake_transport.request.return_value) + self.assertEqual(stream.tell(), size) + get_boundary.assert_called_once_with() + + upload_url = ( + 'https://www.googleapis.com/upload/bigquery/v2/projects/' + + self.PROJECT + + '/jobs?uploadType=multipart') + payload = ( + b'--==0==\r\n' + + b'content-type: application/json; charset=UTF-8\r\n\r\n' + + json.dumps(metadata).encode('utf-8') + b'\r\n' + + b'--==0==\r\n' + + b'content-type: */*\r\n\r\n' + + data + b'\r\n' + + b'--==0==--') + headers = _get_upload_headers(conn.USER_AGENT) + headers['content-type'] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + 'POST', + upload_url, + data=payload, + headers=headers, + ) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary) + + @mock.patch(u'google.resumable_media._upload.get_boundary', + return_value=b'==0==') + def test__do_multipart_upload_with_retry(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, num_retries=8) + def test_copy_table(self): from google.cloud.bigquery.job import CopyJob - PROJECT = 'PROJECT' JOB = 'job_name' - DATASET = 'dataset_name' SOURCE = 'source_table' DESTINATION = 'destination_table' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'copy': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': DESTINATION, + }, + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) destination = dataset.table(DESTINATION) - job = client.copy_table(JOB, destination, source) + + job = client.copy_table(source, destination, job_id=JOB) + + # Check that copy_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT) + self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) self.assertIs(job.destination, destination) - def test_extract_table_to_storage(self): - from google.cloud.bigquery.job import ExtractTableToStorageJob + conn = client._connection = _Connection(RESOURCE) + source2 = dataset.table(SOURCE + '2') + job = client.copy_table([source, source2], destination, job_id=JOB) + self.assertIsInstance(job, CopyJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(list(job.sources), [source, source2]) + self.assertIs(job.destination, destination) - PROJECT = 'PROJECT' - JOB = 'job_name' - DATASET = 'dataset_name' + def test_extract_table(self): + from google.cloud.bigquery.job import ExtractJob + + JOB = 'job_id' + SOURCE = 'source_table' + DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + + job = client.extract_table(source, DESTINATION, job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(job.source, source) + self.assertEqual(list(job.destination_uris), [DESTINATION]) + + def test_extract_table_generated_job_id(self): + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import ExtractJobConfig + from google.cloud.bigquery.job import DestinationFormat + + JOB = 'job_id' SOURCE = 'source_table' DESTINATION = 'gs://bucket_name/object_name' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationUris': [DESTINATION], + 'destinationFormat': 'NEWLINE_DELIMITED_JSON', + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - dataset = client.dataset(DATASET) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) source = dataset.table(SOURCE) - job = client.extract_table_to_storage(JOB, source, DESTINATION) - self.assertIsInstance(job, ExtractTableToStorageJob) + job_config = ExtractJobConfig() + job_config.destination_format = ( + DestinationFormat.NEWLINE_DELIMITED_JSON) + + job = client.extract_table(source, DESTINATION, job_config=job_config) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) self.assertEqual(job.source, source) self.assertEqual(list(job.destination_uris), [DESTINATION]) - def test_run_async_query_defaults(self): + def test_extract_table_w_destination_uris(self): + from google.cloud.bigquery.job import ExtractJob + + JOB = 'job_id' + SOURCE = 'source_table' + DESTINATION1 = 'gs://bucket_name/object_one' + DESTINATION2 = 'gs://bucket_name/object_two' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'extract': { + 'sourceTable': { + 'projectId': self.PROJECT, + 'datasetId': self.DS_ID, + 'tableId': SOURCE, + }, + 'destinationUris': [ + DESTINATION1, + DESTINATION2, + ], + }, + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + dataset = client.dataset(self.DS_ID) + source = dataset.table(SOURCE) + + job = client.extract_table( + source, [DESTINATION1, DESTINATION2], job_id=JOB) + + # Check that extract_table actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + + # Check the job resource. + self.assertIsInstance(job, ExtractJob) + self.assertIs(job._client, client) + self.assertEqual(job.job_id, JOB) + self.assertEqual(job.source, source) + self.assertEqual( + list(job.destination_uris), [DESTINATION1, DESTINATION2]) + + def test_query_defaults(self): from google.cloud.bigquery.job import QueryJob - PROJECT = 'PROJECT' - JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': 'some-random-id', + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - job = client.run_async_query(JOB, QUERY) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) + + job = client.query(QUERY) + self.assertIsInstance(job, QueryJob) + self.assertIsInstance(job.job_id, six.string_types) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, []) - def test_run_async_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + + def test_query_w_udf_resources(self): from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + 'userDefinedFunctionResources': [ + {'resourceUri': RESOURCE_URI}, + ], + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - job = client.run_async_query(JOB, QUERY, udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, udf_resources) self.assertEqual(job.query_parameters, []) - def test_run_async_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertIsInstance( + sent['jobReference']['jobId'], six.string_types) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertTrue(sent_config['useLegacySql']) + self.assertEqual( + sent_config['userDefinedFunctionResources'][0], + {'resourceUri': RESOURCE_URI}) + + def test_query_w_query_parameters(self): from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter - PROJECT = 'PROJECT' JOB = 'job_name' QUERY = 'select count(*) from persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': False, + 'queryParameters': [ + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }, + ], + }, + }, + } creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESOURCE) query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - job = client.run_async_query(JOB, QUERY, - query_parameters=query_parameters) + config = QueryJobConfig() + config.query_parameters = query_parameters + + job = client.query(QUERY, job_config=config, job_id=JOB) + self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) - self.assertEqual(job.name, JOB) + self.assertEqual(job.job_id, JOB) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) self.assertEqual(job.query_parameters, query_parameters) - def test_run_sync_query_defaults(self): - from google.cloud.bigquery.query import QueryResults + # Check that query actually starts the job. + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + sent = req['data'] + self.assertEqual(sent['jobReference']['jobId'], JOB) + sent_config = sent['configuration']['query'] + self.assertEqual(sent_config['query'], QUERY) + self.assertFalse(sent_config['useLegacySql']) + self.assertEqual( + sent_config['queryParameters'][0], + { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'} + }) + + def test_create_rows_wo_schema(self): + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA - PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query = client.run_sync_query(QUERY) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - from google.cloud.bigquery.query import QueryResults + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF) + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + + with self.assertRaises(ValueError) as exc: + client.create_rows(table, ROWS) + + self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) + + def test_create_rows_w_schema(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(self.TABLE_REF, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), + ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), + ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), + ('Bhettye Rhubble', 27, None), + ] + + def _row_data(row): + joined = row[2] + if isinstance(row[2], datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 + return {'full_name': row[0], + 'age': str(row[1]), + 'joined': joined} + + SENT = { + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_dictionaries(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _microseconds_from_datetime + from google.cloud.bigquery.table import Table, SchemaField + + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) creds = _make_credentials() http = object() - client = self._make_one(project=PROJECT, credentials=creds, _http=http) - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - query = client.run_sync_query(QUERY, udf_resources=udf_resources) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, udf_resources) - self.assertEqual(query.query_parameters, []) - - def test_run_sync_query_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - from google.cloud.bigquery.query import QueryResults + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(self.TABLE_REF, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': 32, + 'joined': _datetime_to_rfc3339(WHEN) + }, + { + 'full_name': 'Bharney Rhubble', 'age': 33, + 'joined': WHEN + datetime.timedelta(seconds=1) + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': 29, + 'joined': WHEN + datetime.timedelta(seconds=2) + }, + { + 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None + }, + ] + + def _row_data(row): + joined = row['joined'] + if isinstance(joined, datetime.datetime): + row['joined'] = _microseconds_from_datetime(joined) * 1e-6 + row['age'] = str(row['age']) + return row + + SENT = { + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_list_of_Rows(self): + from google.cloud.bigquery._helpers import Row + from google.cloud.bigquery.table import Table, SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + ] + table = Table(self.TABLE_REF, schema=schema) + f2i = {'full_name': 0, 'age': 1} + ROWS = [ + Row(('Phred Phlyntstone', 32), f2i), + Row(('Bharney Rhubble', 33), f2i), + Row(('Wylma Phlyntstone', 29), f2i), + Row(('Bhettye Rhubble', 27), f2i), + ] + + def _row_data(row): + return {'full_name': row[0], 'age': str(row[1])} + + SENT = { + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_skip_invalid_and_ignore_unknown(self): + from google.cloud.bigquery.table import Table, SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + RESPONSE = { + 'insertErrors': [ + {'index': 1, + 'errors': [ + {'reason': 'REASON', + 'location': 'LOCATION', + 'debugInfo': 'INFO', + 'message': 'MESSAGE'} + ]}, + ]} + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(RESPONSE) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('voter', 'BOOLEAN', mode='NULLABLE'), + ] + table = Table(self.TABLE_REF, schema=schema) + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + + def _row_data(row): + return { + 'full_name': row[0], + 'age': str(row[1]), + 'voter': row[2] and 'true' or 'false', + } + + SENT = { + 'skipInvalidRows': True, + 'ignoreUnknownValues': True, + 'templateSuffix': '20160303', + 'rows': [{'insertId': index, 'json': _row_data(row)} + for index, row in enumerate(ROWS)], + } + + errors = client.create_rows( + table, + ROWS, + row_ids=[index for index, _ in enumerate(ROWS)], + skip_invalid_rows=True, + ignore_unknown_values=True, + template_suffix='20160303', + ) + + self.assertEqual(len(errors), 1) + self.assertEqual(errors[0]['index'], 1) + self.assertEqual(len(errors[0]['errors']), 1) + self.assertEqual(errors[0]['errors'][0], + RESPONSE['insertErrors'][0]['errors'][0]) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_repeated_fields(self): + from google.cloud.bigquery.table import Table, SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}) + full_name = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + table = Table(self.TABLE_REF, schema=[full_name, struct]) + ROWS = [ + (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), + ] + + def _row_data(row): + return {'color': row[0], + 'struct': row[1]} + + SENT = { + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_w_record_schema(self): + from google.cloud.bigquery.table import SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection({}) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + ROWS = [ + ('Phred Phlyntstone', {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}), + ('Bharney Rhubble', {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}), + ('Wylma Phlyntstone', None), + ] + + def _row_data(row): + return {'full_name': row[0], + 'phone': row[1]} + + SENT = { + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(self.TABLE_REF, ROWS, + selected_fields=[full_name, phone]) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_create_rows_errors(self): + from google.cloud.bigquery.table import Table + + ROWS = [ + ('Phred Phlyntstone', 32, True), + ('Bharney Rhubble', 33, False), + ('Wylma Phlyntstone', 29, True), + ('Bhettye Rhubble', 27, True), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.create_rows(self.TABLE_REF, ROWS) + + # table with no schema + with self.assertRaises(ValueError): + client.create_rows(Table(self.TABLE_REF), ROWS) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.create_rows(1, ROWS) + + def test_create_rows_json(self): + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery.dataset import DatasetReference PROJECT = 'PROJECT' - QUERY = 'select count(*) from persons' + DS_ID = 'DS_ID' + TABLE_ID = 'TABLE_ID' + PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( + PROJECT, DS_ID, TABLE_ID) creds = _make_credentials() http = object() client = self._make_one(project=PROJECT, credentials=creds, _http=http) - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - query = client.run_sync_query(QUERY, query_parameters=query_parameters) - self.assertIsInstance(query, QueryResults) - self.assertIs(query._client, client) - self.assertIsNone(query.name) - self.assertEqual(query.query, QUERY) - self.assertEqual(query.udf_resources, []) - self.assertEqual(query.query_parameters, query_parameters) + conn = client._connection = _Connection({}) + table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID) + schema = [ + SchemaField('full_name', 'STRING', mode='REQUIRED'), + SchemaField('age', 'INTEGER', mode='REQUIRED'), + SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'), + ] + table = Table(table_ref, schema=schema) + ROWS = [ + { + 'full_name': 'Phred Phlyntstone', 'age': '32', + 'joined': '2015-07-24T19:53:19.006000Z' + }, + { + 'full_name': 'Bharney Rhubble', 'age': '33', + 'joined': 1437767600.006 + }, + { + 'full_name': 'Wylma Phlyntstone', 'age': '29', + 'joined': 1437767601.006 + }, + { + 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None + }, + ] + + SENT = { + 'rows': [{ + 'json': row, + 'insertId': str(i), + } for i, row in enumerate(ROWS)], + } + + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows_json(table, ROWS) + + self.assertEqual(len(errors), 0) + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['data'], SENT) + + def test_query_rows_defaults(self): + from google.api_core.page_iterator import HTTPIterator + from google.cloud.bigquery._helpers import Row + + JOB = 'job-id' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '3', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': '1'}]}, + {'f': [{'v': '2'}]}, + ] + LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + LAST_PAGE['rows'] = [ + {'f': [{'v': '3'}]}, + ] + del LAST_PAGE['pageToken'] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE) + + rows_iter = client.query_rows(QUERY) + rows = list(rows_iter) + + self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)]) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 4) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertIsInstance( + req['data']['jobReference']['jobId'], six.string_types) + + def test_query_rows_w_job_id(self): + from google.api_core.page_iterator import HTTPIterator + + JOB = 'job-id' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + rows_iter = client.query_rows(QUERY, job_id=JOB) + rows = list(rows_iter) + + self.assertEqual(rows, []) + self.assertIs(rows_iter.client, client) + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 3) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + + def test_query_rows_w_job_config(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.api_core.page_iterator import HTTPIterator + + JOB = 'job-id' + QUERY = 'SELECT COUNT(*) FROM persons' + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': JOB, + }, + 'configuration': { + 'query': { + 'query': QUERY, + 'useLegacySql': True, + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + }, + }, + 'dryRun': True, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ] + }, + 'totalRows': '0', + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE) + + job_config = QueryJobConfig() + job_config.use_legacy_sql = True + job_config.dry_run = True + rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config) + + self.assertIsInstance(rows_iter, HTTPIterator) + self.assertEqual(len(conn._requested), 2) + req = conn._requested[0] + configuration = req['data']['configuration'] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], '/projects/PROJECT/jobs') + self.assertEqual(req['data']['jobReference']['jobId'], JOB) + self.assertEqual(configuration['query']['useLegacySql'], True) + self.assertEqual(configuration['dryRun'], True) + + def test_list_rows(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import Table, SchemaField + from google.cloud.bigquery._helpers import Row + + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + WHEN_TS = 1437767599.006 + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( + tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(seconds=1) + WHEN_2 = WHEN + datetime.timedelta(seconds=2) + ROWS = 1234 + TOKEN = 'TOKEN' + + def _bigquery_timestamp_float_repr(ts_float): + # Preserve microsecond precision for E+09 timestamps + return '%0.15E' % (ts_float,) + + DATA = { + 'totalRows': str(ROWS), + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': '32'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': '33'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': '29'}, + {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, + ]}, + {'f': [ + {'v': 'Bhettye Rhubble'}, + {'v': None}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(DATA, DATA) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='NULLABLE') + joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') + table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + f2i = {'full_name': 0, 'age': 1, 'joined': 2} + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i)) + self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i)) + self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i)) + self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i)) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + self.assertEqual(req['query_params'], {}) + + def test_list_rows_query_params(self): + from google.cloud.bigquery.table import Table, SchemaField + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + table = Table(self.TABLE_REF, + schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')]) + tests = [ + ({}, {}), + ({'start_index': 1}, {'startIndex': 1}), + ({'max_results': 2}, {'maxResults': 2}), + ({'start_index': 1, 'max_results': 2}, + {'startIndex': 1, 'maxResults': 2}), + ] + conn = client._connection = _Connection(*len(tests)*[{}]) + for i, test in enumerate(tests): + iterator = client.list_rows(table, **test[0]) + six.next(iterator.pages) + req = conn._requested[i] + self.assertEqual(req['query_params'], test[1], + 'for kwargs %s' % test[0]) + + def test_list_rows_repeated_fields(self): + from google.cloud.bigquery.table import SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': [{'v': 'red'}, {'v': 'green'}]}, + {'v': [{ + 'v': { + 'f': [ + {'v': [{'v': '1'}, {'v': '2'}]}, + {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, + ]} + }]}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(DATA) + color = SchemaField('color', 'STRING', mode='REPEATED') + index = SchemaField('index', 'INTEGER', 'REPEATED') + score = SchemaField('score', 'FLOAT', 'REPEATED') + struct = SchemaField('struct', 'RECORD', mode='REPEATED', + fields=[index, score]) + + iterator = client.list_rows(self.TABLE_REF, + selected_fields=[color, struct]) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0][0], ['red', 'green']) + self.assertEqual(rows[0][1], [{'index': [1, 2], + 'score': [3.1415, 1.414]}]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_w_record_schema(self): + from google.cloud.bigquery.table import Table, SchemaField + + PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( + self.PROJECT, self.DS_ID, self.TABLE_ID) + ROWS = 1234 + TOKEN = 'TOKEN' + DATA = { + 'totalRows': ROWS, + 'pageToken': TOKEN, + 'rows': [ + {'f': [ + {'v': 'Phred Phlyntstone'}, + {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, + ]}, + {'f': [ + {'v': 'Bharney Rhubble'}, + {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, + ]}, + {'f': [ + {'v': 'Wylma Phlyntstone'}, + {'v': None}, + ]}, + ] + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + conn = client._connection = _Connection(DATA) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + area_code = SchemaField('area_code', 'STRING', 'REQUIRED') + local_number = SchemaField('local_number', 'STRING', 'REQUIRED') + rank = SchemaField('rank', 'INTEGER', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='NULLABLE', + fields=[area_code, local_number, rank]) + table = Table(self.TABLE_REF, schema=[full_name, phone]) + + iterator = client.list_rows(table) + page = six.next(iterator.pages) + rows = list(page) + total_rows = iterator.total_rows + page_token = iterator.next_page_token + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0][0], 'Phred Phlyntstone') + self.assertEqual(rows[0][1], {'area_code': '800', + 'local_number': '555-1212', + 'rank': 1}) + self.assertEqual(rows[1][0], 'Bharney Rhubble') + self.assertEqual(rows[1][1], {'area_code': '877', + 'local_number': '768-5309', + 'rank': 2}) + self.assertEqual(rows[2][0], 'Wylma Phlyntstone') + self.assertIsNone(rows[2][1]) + self.assertEqual(total_rows, ROWS) + self.assertEqual(page_token, TOKEN) + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'GET') + self.assertEqual(req['path'], '/%s' % PATH) + + def test_list_rows_errors(self): + from google.cloud.bigquery.table import Table + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + + # table ref with no selected fields + with self.assertRaises(ValueError): + client.list_rows(self.TABLE_REF) + + # table with no schema + with self.assertRaises(ValueError): + client.list_rows(Table(self.TABLE_REF)) + + # neither Table nor tableReference + with self.assertRaises(TypeError): + client.list_rows(1) + + def test_list_partitions(self): + RESOURCE = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': 'JOB_ID', + }, + 'configuration': { + 'query': { + 'query': 'q', + 'destinationTable': { + 'projectId': self.PROJECT, + 'datasetId': 'DS_ID', + 'tableId': 'TABLE_ID', + }, + }, + }, + 'status': { + 'state': 'DONE', + }, + } + RESULTS_RESOURCE = { + 'jobReference': RESOURCE['jobReference'], + 'jobComplete': True, + 'schema': { + 'fields': [ + {'name': 'partition_id', 'type': 'INTEGER', + 'mode': 'REQUIRED'}, + ] + }, + 'totalRows': '2', + 'pageToken': 'next-page', + } + FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE) + FIRST_PAGE['rows'] = [ + {'f': [{'v': 20160804}]}, + {'f': [{'v': 20160805}]}, + ] + del FIRST_PAGE['pageToken'] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http) + client._connection = _Connection( + RESOURCE, RESULTS_RESOURCE, FIRST_PAGE) + self.assertEqual(client.list_partitions(self.TABLE_REF), + [20160804, 20160805]) + + +class Test_make_job_id(unittest.TestCase): + def _call_fut(self, job_id, prefix=None): + from google.cloud.bigquery.client import _make_job_id + + return _make_job_id(job_id, prefix=prefix) + + def test__make_job_id_wo_suffix(self): + job_id = self._call_fut('job_id') + + self.assertEqual(job_id, 'job_id') + + def test__make_job_id_w_suffix(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None, prefix='job_id') + + self.assertEqual(job_id, 'job_id212345') + + def test__make_random_job_id(self): + with mock.patch('uuid.uuid4', side_effect=['212345']): + job_id = self._call_fut(None) + + self.assertEqual(job_id, '212345') + + def test__make_job_id_w_job_id_overrides_prefix(self): + job_id = self._call_fut('job_id', prefix='unused_prefix') + + self.assertEqual(job_id, 'job_id') + + +class TestClientUpload(object): + # NOTE: This is a "partner" to `TestClient` meant to test some of the + # "load_table_from_file" portions of `Client`. It also uses + # `pytest`-style tests rather than `unittest`-style. + + TABLE_REF = DatasetReference( + 'project_id', 'test_dataset').table('test_table') + + @staticmethod + def _make_client(transport=None): + from google.cloud.bigquery import _http + from google.cloud.bigquery import client + + cl = client.Client(project='project_id', + credentials=_make_credentials(), + _http=transport) + cl._connection = mock.create_autospec(_http.Connection, instance=True) + return cl + + @staticmethod + def _make_response(status_code, content='', headers={}): + """Make a mock HTTP response.""" + import requests + response = requests.Response() + response.request = requests.Request( + 'POST', 'http://example.com').prepare() + response._content = content.encode('utf-8') + response.headers.update(headers) + response.status_code = status_code + return response + + @classmethod + def _make_do_upload_patch(cls, client, method, + resource={}, side_effect=None): + """Patches the low-level upload helpers.""" + if side_effect is None: + side_effect = [cls._make_response( + http_client.OK, + json.dumps(resource), + {'Content-Type': 'application/json'})] + return mock.patch.object( + client, method, side_effect=side_effect, autospec=True) + + EXPECTED_CONFIGURATION = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'sourceFormat': 'CSV', + 'destinationTable': { + 'projectId': 'project_id', + 'datasetId': 'test_dataset', + 'tableId': 'test_table' + } + } + } + } + + @staticmethod + def _make_file_obj(): + return io.BytesIO(b'hello, is it me you\'re looking for?') + + @staticmethod + def _make_config(): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + config.source_format = 'CSV' + return config + + # High-level tests + + def test_load_table_from_file_resumable(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file(file_obj, self.TABLE_REF, + job_id='job_id', + job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_resumable_metadata(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + config = self._make_config() + config.allow_jagged_rows = False + config.allow_quoted_newlines = False + config.create_disposition = 'CREATE_IF_NEEDED' + config.encoding = 'utf8' + config.field_delimiter = ',' + config.ignore_unknown_values = False + config.max_bad_records = 0 + config.quote_character = '"' + config.skip_leading_rows = 1 + config.write_disposition = 'WRITE_APPEND' + config.null_marker = r'\N' + + expected_config = { + 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'}, + 'configuration': { + 'load': { + 'destinationTable': { + 'projectId': self.TABLE_REF.project, + 'datasetId': self.TABLE_REF.dataset_id, + 'tableId': self.TABLE_REF.table_id, + }, + 'sourceFormat': config.source_format, + 'allowJaggedRows': config.allow_jagged_rows, + 'allowQuotedNewlines': config.allow_quoted_newlines, + 'createDisposition': config.create_disposition, + 'encoding': config.encoding, + 'fieldDelimiter': config.field_delimiter, + 'ignoreUnknownValues': config.ignore_unknown_values, + 'maxBadRecords': config.max_bad_records, + 'quote': config.quote_character, + 'skipLeadingRows': str(config.skip_leading_rows), + 'writeDisposition': config.write_disposition, + 'nullMarker': config.null_marker, + }, + }, + } + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', expected_config) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config) + + do_upload.assert_called_once_with( + file_obj, + expected_config, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_multipart(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_size = 10 + config = self._make_config() + + do_upload_patch = self._make_do_upload_patch( + client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, job_id='job_id', job_config=config, + size=file_obj_size) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + _DEFAULT_NUM_RETRIES) + + def test_load_table_from_file_with_retries(self): + client = self._make_client() + file_obj = self._make_file_obj() + num_retries = 20 + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, self.TABLE_REF, num_retries=num_retries, + job_id='job_id', job_config=self._make_config()) + + do_upload.assert_called_once_with( + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries) + + def test_load_table_from_file_with_rewind(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj.seek(2) + + with self._make_do_upload_patch( + client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION): + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert file_obj.tell() == 0 + + def test_load_table_from_file_failure(self): + from google.resumable_media import InvalidResponse + from google.cloud import exceptions + + client = self._make_client() + file_obj = self._make_file_obj() + + response = self._make_response( + content='Someone is already in this spot.', + status_code=http_client.CONFLICT) + + do_upload_patch = self._make_do_upload_patch( + client, '_do_resumable_upload', + side_effect=InvalidResponse(response)) + + with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: + client.load_table_from_file( + file_obj, self.TABLE_REF, rewind=True) + + assert response.text in exc_info.value.message + assert exc_info.value.errors == [] + + def test_load_table_from_file_bad_mode(self): + client = self._make_client() + file_obj = mock.Mock(spec=['mode']) + file_obj.mode = 'x' + + with pytest.raises(ValueError): + client.load_table_from_file(file_obj, self.TABLE_REF) + + # Low-level tests + + @classmethod + def _make_resumable_upload_responses(cls, size): + """Make a series of responses for a successful resumable upload.""" + from google import resumable_media + + resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' + initial_response = cls._make_response( + http_client.OK, '', {'location': resumable_url}) + data_response = cls._make_response( + resumable_media.PERMANENT_REDIRECT, + '', {'range': 'bytes=0-{:d}'.format(size - 1)}) + final_response = cls._make_response( + http_client.OK, + json.dumps({'size': size}), + {'Content-Type': 'application/json'}) + return [initial_response, data_response, final_response] + + @staticmethod + def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True) + transport.request.side_effect = responses + return transport + + def test__do_resumable_upload(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len)) + client = self._make_client(transport) + + result = client._do_resumable_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + None) + + content = result.content.decode('utf-8') + assert json.loads(content) == {'size': file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.request.assert_any_call( + 'POST', + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), + headers=mock.ANY) + + def test__do_multipart_upload(self): + transport = self._make_transport([self._make_response(http_client.OK)]) + client = self._make_client(transport) + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_len, + None) + + # Verify that configuration data was passed in with the initial + # request. + request_args = transport.request.mock_calls[0][2] + request_data = request_args['data'].decode('utf-8') + request_headers = request_args['headers'] + + request_content = email.message_from_string( + 'Content-Type: {}\r\n{}'.format( + request_headers['content-type'].decode('utf-8'), + request_data)) + + # There should be two payloads: the configuration and the binary daya. + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION + assert binary_data.encode('utf-8') == file_obj.getvalue() + + def test__do_multipart_upload_wrong_size(self): + client = self._make_client() + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload( + file_obj, + {}, + file_obj_len+1, + None) class _Connection(object): + USER_AGENT = 'testing 1.2.3' + def __init__(self, *responses): self._responses = responses self._requested = [] @@ -672,4 +3231,6 @@ def api_request(self, **kw): raise NotFound('miss') response, self._responses = self._responses[0], self._responses[1:] + if isinstance(response, Exception): + raise response return response diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index 164f9ed0a2b4..c04d154b52da 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -17,22 +17,22 @@ import mock -class TestAccessGrant(unittest.TestCase): +class TestAccessEntry(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import AccessEntry - return AccessGrant + return AccessEntry def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant.role, 'OWNER') - self.assertEqual(grant.entity_type, 'userByEmail') - self.assertEqual(grant.entity_id, 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + self.assertEqual(entry.role, 'OWNER') + self.assertEqual(entry.entity_type, 'userByEmail') + self.assertEqual(entry.entity_id, 'phred@example.com') def test_ctor_bad_entity_type(self): with self.assertRaises(ValueError): @@ -48,10 +48,10 @@ def test_ctor_view_success(self): role = None entity_type = 'view' entity_id = object() - grant = self._make_one(role, entity_type, entity_id) - self.assertEqual(grant.role, role) - self.assertEqual(grant.entity_type, entity_type) - self.assertEqual(grant.entity_id, entity_id) + entry = self._make_one(role, entity_type, entity_id) + self.assertEqual(entry.role, role) + self.assertEqual(entry.entity_type, entity_type) + self.assertEqual(entry.entity_id, entity_id) def test_ctor_nonview_without_role(self): role = None @@ -60,34 +60,131 @@ def test_ctor_nonview_without_role(self): self._make_one(role, entity_type, None) def test___eq___role_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('WRITER', 'userByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___entity_id_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com') - self.assertNotEqual(grant, other) + self.assertNotEqual(entry, other) def test___eq___hit(self): - grant = self._make_one('OWNER', 'userByEmail', 'phred@example.com') + entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com') other = self._make_one('OWNER', 'userByEmail', 'phred@example.com') - self.assertEqual(grant, other) + self.assertEqual(entry, other) def test__eq___type_mismatch(self): - grant = self._make_one('OWNER', 'userByEmail', 'silly@example.com') - self.assertNotEqual(grant, object()) - self.assertEqual(grant, mock.ANY) + entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com') + self.assertNotEqual(entry, object()) + self.assertEqual(entry, mock.ANY) + + +class TestDatasetReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetReference + + return DatasetReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + self.assertEqual(dataset_ref.project, 'some-project-1') + self.assertEqual(dataset_ref.dataset_id, 'dataset_1') + + def test_ctor_bad_args(self): + with self.assertRaises(ValueError): + self._make_one(1, 'd') + with self.assertRaises(ValueError): + self._make_one('p', 2) + + def test_table(self): + dataset_ref = self._make_one('some-project-1', 'dataset_1') + table_ref = dataset_ref.table('table_1') + self.assertEqual(table_ref.dataset_id, 'dataset_1') + self.assertEqual(table_ref.project, 'some-project-1') + self.assertEqual(table_ref.table_id, 'table_1') + + def test_to_api_repr(self): + dataset = self._make_one('project_1', 'dataset_1') + + resource = dataset.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + expected = self._make_one('project_1', 'dataset_1') + + got = DatasetReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + dataset = self._make_one('project_1', 'dataset_1') + other = object() + self.assertNotEqual(dataset, other) + self.assertEqual(dataset, mock.ANY) + + def test___eq___project_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_2', 'dataset_1') + self.assertNotEqual(dataset, other) + + def test___eq___dataset_mismatch(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_2') + self.assertNotEqual(dataset, other) + + def test___eq___equality(self): + dataset = self._make_one('project_1', 'dataset_1') + other = self._make_one('project_1', 'dataset_1') + self.assertEqual(dataset, other) + + def test___hash__set_equality(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1, dataset2} + set_two = {dataset1, dataset2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + dataset1 = self._make_one('project_1', 'dataset_1') + dataset2 = self._make_one('project_1', 'dataset_2') + set_one = {dataset1} + set_two = {dataset2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = self._make_one('project1', 'dataset1') + expected = "DatasetReference('project1', 'dataset1')" + self.assertEqual(repr(dataset), expected) class TestDataset(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + PROJECT = 'project' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-id' + DS_REF = DatasetReference(PROJECT, DS_ID) @staticmethod def _get_target_class(): @@ -106,7 +203,7 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.DS_ID = '%s:%s' % (self.PROJECT, self.DS_NAME) + self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' def _makeResource(self): @@ -116,12 +213,13 @@ def _makeResource(self): return { 'creationTime': self.WHEN_TS * 1000, 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, + {'projectId': self.PROJECT, 'datasetId': self.DS_ID}, 'etag': self.ETAG, - 'id': self.DS_ID, + 'id': self.DS_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, + 'defaultTableExpirationMs': 3600, 'access': [ {'role': 'OWNER', 'userByEmail': USER_EMAIL}, {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, @@ -129,22 +227,23 @@ def _makeResource(self): {'role': 'READER', 'specialGroup': 'projectReaders'}], } - def _verifyAccessGrants(self, access_grants, resource): - r_grants = [] - for r_grant in resource['access']: - role = r_grant.pop('role') - for entity_type, entity_id in sorted(r_grant.items()): - r_grants.append({'role': role, - 'entity_type': entity_type, - 'entity_id': entity_id}) + def _verify_access_entry(self, access_entries, resource): + r_entries = [] + for r_entry in resource['access']: + role = r_entry.pop('role') + for entity_type, entity_id in sorted(r_entry.items()): + r_entries.append({ + 'role': role, + 'entity_type': entity_type, + 'entity_id': entity_id}) - self.assertEqual(len(access_grants), len(r_grants)) - for a_grant, r_grant in zip(access_grants, r_grants): - self.assertEqual(a_grant.role, r_grant['role']) - self.assertEqual(a_grant.entity_type, r_grant['entity_type']) - self.assertEqual(a_grant.entity_id, r_grant['entity_id']) + self.assertEqual(len(access_entries), len(r_entries)) + for a_entry, r_entry in zip(access_entries, r_entries): + self.assertEqual(a_entry.role, r_entry['role']) + self.assertEqual(a_entry.entity_type, r_entry['entity_type']) + self.assertEqual(a_entry.entity_id, r_entry['entity_id']) - def _verifyReadonlyResourceProperties(self, dataset, resource): + def _verify_readonly_resource_properties(self, dataset, resource): self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -165,9 +264,9 @@ def _verifyReadonlyResourceProperties(self, dataset, resource): else: self.assertIsNone(dataset.self_link) - def _verifyResourceProperties(self, dataset, resource): + def _verify_resource_properties(self, dataset, resource): - self._verifyReadonlyResourceProperties(dataset, resource) + self._verify_readonly_resource_properties(dataset, resource) if 'defaultTableExpirationMs' in resource: self.assertEqual(dataset.default_table_expiration_ms, @@ -179,23 +278,21 @@ def _verifyResourceProperties(self, dataset, resource): self.assertEqual(dataset.location, resource.get('location')) if 'access' in resource: - self._verifyAccessGrants(dataset.access_grants, resource) + self._verify_access_entry(dataset.access_entries, resource) else: - self.assertEqual(dataset.access_grants, []) + self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) - self.assertEqual(dataset.name, self.DS_NAME) - self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, client.project) + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, []) + '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) + self.assertEqual(dataset.access_entries, []) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -206,26 +303,23 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.location) def test_ctor_explicit(self): - from google.cloud.bigquery.dataset import AccessGrant + from google.cloud.bigquery.dataset import DatasetReference, AccessEntry - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - grants = [phred, bharney] + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client, - access_grants=grants, - project=OTHER_PROJECT) - self.assertEqual(dataset.name, self.DS_NAME) - self.assertIs(dataset._client, client) + dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID)) + dataset.access_entries = entries + self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, - '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_NAME)) - self.assertEqual(dataset.access_grants, grants) + '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID)) + self.assertEqual(dataset.access_entries, entries) self.assertIsNone(dataset.created) - self.assertIsNone(dataset.dataset_id) + self.assertIsNone(dataset.full_dataset_id) self.assertIsNone(dataset.etag) self.assertIsNone(dataset.modified) self.assertIsNone(dataset.self_link) @@ -235,120 +329,113 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) - def test_access_grants_setter_non_list(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + def test_access_entries_setter_non_list(self): + dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): - dataset.access_grants = object() + dataset.access_entries = object() - def test_access_grants_setter_invalid_field(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter_invalid_field(self): + from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') + dataset = self._make_one(self.DS_REF) + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): - dataset.access_grants = [phred, object()] + dataset.access_entries = [phred, object()] - def test_access_grants_setter(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_access_entries_setter(self): + from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) - phred = AccessGrant('OWNER', 'userByEmail', 'phred@example.com') - bharney = AccessGrant('OWNER', 'userByEmail', 'bharney@example.com') - dataset.access_grants = [phred, bharney] - self.assertEqual(dataset.access_grants, [phred, bharney]) + dataset = self._make_one(self.DS_REF) + phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') + bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') + dataset.access_entries = [phred, bharney] + self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client) + dataset = self._make_one(self.DS_REF) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') + def test_labels_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.labels = {'color': 'green'} + self.assertEqual(dataset.labels, {'color': 'green'}) + + def test_labels_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.labels = None + def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, } } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + dataset = klass.from_api_repr(RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) RESOURCE = self._makeResource() klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + dataset = klass.from_api_repr(RESOURCE) + self._verify_resource_properties(dataset, RESOURCE) - def test__parse_access_grants_w_unknown_entity_type(self): + def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) + dataset._parse_access_entries(ACCESS) - def test__parse_access_grants_w_extra_keys(self): + def test__parse_access_entries_w_extra_keys(self): USER_EMAIL = 'phred@example.com' ACCESS = [ { @@ -357,495 +444,16 @@ def test__parse_access_grants_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_NAME, client=client) + dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): - dataset._parse_access_grants(ACCESS) - - def test_create_w_bound_client(self): - PATH = 'projects/%s/datasets' % self.PROJECT - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) + dataset._parse_access_entries(ACCESS) - def test_create_w_alternate_client(self): - from google.cloud.bigquery.dataset import AccessGrant + def test_table(self): + from google.cloud.bigquery.table import TableReference - PATH = 'projects/%s/datasets' % self.PROJECT - USER_EMAIL = 'phred@example.com' - GROUP_EMAIL = 'group-name@lists.example.com' - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - dataset.friendly_name = TITLE - dataset.description = DESCRIPTION - VIEW = { - 'projectId': 'my-proj', - 'datasetId': 'starry-skies', - 'tableId': 'northern-hemisphere', - } - dataset.access_grants = [ - AccessGrant('OWNER', 'userByEmail', USER_EMAIL), - AccessGrant('OWNER', 'groupByEmail', GROUP_EMAIL), - AccessGrant('READER', 'domain', 'foo.com'), - AccessGrant('READER', 'specialGroup', 'projectReaders'), - AccessGrant('WRITER', 'specialGroup', 'projectWriters'), - AccessGrant(None, 'view', VIEW), - ] - - dataset.create(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - }, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'access': [ - {'role': 'OWNER', 'userByEmail': USER_EMAIL}, - {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL}, - {'role': 'READER', 'domain': 'foo.com'}, - {'role': 'READER', 'specialGroup': 'projectReaders'}, - {'role': 'WRITER', 'specialGroup': 'projectWriters'}, - {'view': VIEW}, - ], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - PATH = 'projects/%s/datasets' % (self.PROJECT,) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - dataset.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - self.assertFalse(dataset.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - - self.assertTrue(dataset.exists(client=CLIENT2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - dataset.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - - dataset.reload(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - with self.assertRaises(ValueError): - dataset.patch(default_table_expiration_ms='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - dataset.patch(description=DESCRIPTION, friendly_name=TITLE) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_patch_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = str(DEF_TABLE_EXP) - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - - dataset.patch(client=CLIENT2, - default_table_expiration_ms=DEF_TABLE_EXP, - location=LOCATION) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'defaultTableExpirationMs': DEF_TABLE_EXP, - 'location': LOCATION, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_update_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - dataset.description = DESCRIPTION - dataset.friendly_name = TITLE - - dataset.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_update_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - dataset.default_table_expiration_ms = DEF_TABLE_EXP - dataset.location = LOCATION - - dataset.update(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'datasetReference': - {'projectId': self.PROJECT, 'datasetId': self.DS_NAME}, - 'defaultTableExpirationMs': 12345, - 'location': 'EU', - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(dataset, RESOURCE) - - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - dataset.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_NAME) - conn1 = _Connection() - CLIENT1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - CLIENT2 = _Client(project=self.PROJECT, connection=conn2) - dataset = self._make_one(self.DS_NAME, client=CLIENT1) - - dataset.delete(client=CLIENT2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_empty(self): - import six - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(tables, []) - self.assertIsNone(token) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_defaults(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - TOKEN = 'TOKEN' - DATA = { - 'nextPageToken': TOKEN, - 'tables': [ - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#table', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - iterator = dataset.list_tables() - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertEqual(token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_list_tables_explicit(self): - import six - from google.cloud.bigquery.table import Table - - TABLE_1 = 'table_one' - TABLE_2 = 'table_two' - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - TOKEN = 'TOKEN' - DATA = { - 'tables': [ - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_1), - 'tableReference': {'tableId': TABLE_1, - 'datasetId': self.DS_NAME, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - {'kind': 'bigquery#dataset', - 'id': '%s:%s.%s' % (self.PROJECT, self.DS_NAME, TABLE_2), - 'tableReference': {'tableId': TABLE_2, - 'datasetId': self.DS_NAME, - 'projectId': self.PROJECT}, - 'type': 'TABLE'}, - ] - } - - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - - iterator = dataset.list_tables(max_results=3, page_token=TOKEN) - self.assertIs(iterator.dataset, dataset) - page = six.next(iterator.pages) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA['tables'])) - for found, expected in zip(tables, DATA['tables']): - self.assertIsInstance(found, Table) - self.assertEqual(found.table_id, expected['id']) - self.assertEqual(found.table_type, expected['type']) - self.assertIsNone(token) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': 3, 'pageToken': TOKEN}) - - def test_table_wo_schema(self): - from google.cloud.bigquery.table import Table - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - table = dataset.table('table_name') - self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') - self.assertIs(table._dataset, dataset) - self.assertEqual(table.schema, []) - - def test_table_w_schema(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = self._make_one(self.DS_NAME, client=client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table('table_name', schema=[full_name, age]) - self.assertIsInstance(table, Table) - self.assertEqual(table.name, 'table_name') - self.assertIs(table._dataset, dataset) - self.assertEqual(table.schema, [full_name, age]) - - -class _Client(object): - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - -class _Connection(object): - - def __init__(self, *responses): - self._responses = responses - self._requested = [] - - def api_request(self, **kw): - from google.cloud.exceptions import NotFound - - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response + dataset = self._make_one(self.DS_REF) + table = dataset.table('table_id') + self.assertIsInstance(table, TableReference) + self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.dataset_id, self.DS_ID) + self.assertEqual(table.project, self.PROJECT) diff --git a/bigquery/tests/unit/test_dbapi_cursor.py b/bigquery/tests/unit/test_dbapi_cursor.py index be327a8962a2..a16b7b47ee3f 100644 --- a/bigquery/tests/unit/test_dbapi_cursor.py +++ b/bigquery/tests/unit/test_dbapi_cursor.py @@ -30,37 +30,45 @@ def _make_one(self, *args, **kw): def _mock_client( self, rows=None, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import client + + if rows is None: + total_rows = 0 + else: + total_rows = len(rows) + mock_client = mock.create_autospec(client.Client) - mock_client.run_async_query.return_value = self._mock_job( - rows=rows, schema=schema, + mock_client.query.return_value = self._mock_job( + total_rows=total_rows, + schema=schema, num_dml_affected_rows=num_dml_affected_rows) + mock_client.list_rows.return_value = rows return mock_client def _mock_job( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import job mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = 'DONE' mock_job.result.return_value = mock_job + + if num_dml_affected_rows is None: + mock_job.statement_type = None # API sends back None for SELECT + else: + mock_job.statement_type = 'UPDATE' + mock_job.query_results.return_value = self._mock_results( - rows=rows, schema=schema, + total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows) return mock_job def _mock_results( - self, rows=None, schema=None, num_dml_affected_rows=None): + self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query mock_results = mock.create_autospec(query.QueryResults) mock_results.schema = schema mock_results.num_dml_affected_rows = num_dml_affected_rows - - if rows is None: - mock_results.total_rows = 0 - else: - mock_results.total_rows = len(rows) - - mock_results.fetch_data.return_value = rows + mock_results.total_rows = total_rows return mock_results def test_ctor(self): @@ -177,7 +185,9 @@ def test_execute_custom_job_id(self): connection = connect(client) cursor = connection.cursor() cursor.execute('SELECT 1;', job_id='foo') - self.assertEqual(client.run_async_query.mock_calls[0][1][0], 'foo') + args, kwargs = client.query.call_args + self.assertEqual(args[0], 'SELECT 1;') + self.assertEqual(kwargs['job_id'], 'foo') def test_execute_w_dml(self): from google.cloud.bigquery.dbapi import connect @@ -185,8 +195,10 @@ def test_execute_w_dml(self): self._mock_client(rows=[], num_dml_affected_rows=12)) cursor = connection.cursor() cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';') + rows = cursor.fetchall() self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + self.assertEqual(rows, []) def test_execute_w_query(self): from google.cloud.bigquery.schema import SchemaField @@ -239,7 +251,7 @@ def test_execute_raises_if_result_raises(self): job = mock.create_autospec(job.QueryJob) job.result.side_effect = google.cloud.exceptions.GoogleCloudError('') client = mock.create_autospec(client.Client) - client.run_async_query.return_value = job + client.query.return_value = job connection = connect(client) cursor = connection.cursor() diff --git a/bigquery/tests/unit/test_external_config.py b/bigquery/tests/unit/test_external_config.py new file mode 100644 index 000000000000..b7887428606d --- /dev/null +++ b/bigquery/tests/unit/test_external_config.py @@ -0,0 +1,212 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import copy +import unittest + +from google.cloud.bigquery.external_config import ExternalConfig + + +class TestExternalConfig(unittest.TestCase): + + SOURCE_URIS = ['gs://foo', 'gs://bar'] + + BASE_RESOURCE = { + 'sourceFormat': '', + 'sourceUris': SOURCE_URIS, + 'maxBadRecords': 17, + 'autodetect': True, + 'ignoreUnknownValues': False, + 'compression': 'compression', + } + + def test_api_repr_base(self): + from google.cloud.bigquery.schema import SchemaField + + resource = copy.deepcopy(self.BASE_RESOURCE) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, []) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, self.BASE_RESOURCE) + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'schema': { + 'fields': [ + { + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + }, + ], + }, + }) + ec = ExternalConfig.from_api_repr(resource) + self._verify_base(ec) + self.assertEqual(ec.schema, + [SchemaField('full_name', 'STRING', mode='REQUIRED')]) + self.assertIsNone(ec.options) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def _verify_base(self, ec): + self.assertEqual(ec.autodetect, True) + self.assertEqual(ec.compression, 'compression') + self.assertEqual(ec.ignore_unknown_values, False) + self.assertEqual(ec.max_bad_records, 17) + self.assertEqual(ec.source_uris, self.SOURCE_URIS) + + def test_to_api_repr_source_format(self): + ec = ExternalConfig('CSV') + got = ec.to_api_repr() + want = {'sourceFormat': 'CSV'} + self.assertEqual(got, want) + + def test_api_repr_sheets(self): + from google.cloud.bigquery.external_config import GoogleSheetsOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'GOOGLE_SHEETS', + 'googleSheetsOptions': {'skipLeadingRows': '123'}, + }) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'GOOGLE_SHEETS') + self.assertIsInstance(ec.options, GoogleSheetsOptions) + self.assertEqual(ec.options.skip_leading_rows, 123) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + del resource['googleSheetsOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_api_repr_csv(self): + from google.cloud.bigquery.external_config import CSVOptions + + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'fieldDelimiter': 'fieldDelimiter', + 'skipLeadingRows': '123', + 'quote': 'quote', + 'allowQuotedNewlines': True, + 'allowJaggedRows': False, + 'encoding': 'encoding', + }, + }) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'CSV') + self.assertIsInstance(ec.options, CSVOptions) + self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter') + self.assertEqual(ec.options.skip_leading_rows, 123) + self.assertEqual(ec.options.quote_character, 'quote') + self.assertEqual(ec.options.allow_quoted_newlines, True) + self.assertEqual(ec.options.allow_jagged_rows, False) + self.assertEqual(ec.options.encoding, 'encoding') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + del resource['csvOptions']['skipLeadingRows'] + ec = ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.options.skip_leading_rows) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_api_repr_bigtable(self): + from google.cloud.bigquery.external_config import BigtableOptions + from google.cloud.bigquery.external_config import BigtableColumnFamily + + QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii') + resource = _copy_and_update(self.BASE_RESOURCE, { + 'sourceFormat': 'BIGTABLE', + 'bigtableOptions': { + 'ignoreUnspecifiedColumnFamilies': True, + 'readRowkeyAsString': False, + 'columnFamilies': [ + { + 'familyId': 'familyId', + 'type': 'type', + 'encoding': 'encoding', + 'columns': [ + { + 'qualifierString': 'q', + 'fieldName': 'fieldName1', + 'type': 'type1', + 'encoding': 'encoding1', + 'onlyReadLatest': True, + }, + { + 'qualifierEncoded': QUALIFIER_ENCODED, + 'fieldName': 'fieldName2', + 'type': 'type2', + 'encoding': 'encoding2', + }, + + ], + 'onlyReadLatest': False, + } + ], + }, + }) + + ec = ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, 'BIGTABLE') + self.assertIsInstance(ec.options, BigtableOptions) + self.assertEqual(ec.options.ignore_unspecified_column_families, True) + self.assertEqual(ec.options.read_rowkey_as_string, False) + self.assertEqual(len(ec.options.column_families), 1) + fam1 = ec.options.column_families[0] + self.assertIsInstance(fam1, BigtableColumnFamily) + self.assertEqual(fam1.family_id, 'familyId') + self.assertEqual(fam1.type_, 'type') + self.assertEqual(fam1.encoding, 'encoding') + self.assertEqual(len(fam1.columns), 2) + col1 = fam1.columns[0] + self.assertEqual(col1.qualifier_string, 'q') + self.assertEqual(col1.field_name, 'fieldName1') + self.assertEqual(col1.type_, 'type1') + self.assertEqual(col1.encoding, 'encoding1') + col2 = ec.options.column_families[0].columns[1] + self.assertEqual(col2.qualifier_encoded, b'q') + self.assertEqual(col2.field_name, 'fieldName2') + self.assertEqual(col2.type_, 'type2') + self.assertEqual(col2.encoding, 'encoding2') + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + +def _copy_and_update(d, u): + d = copy.deepcopy(d) + d.update(u) + return d diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index ab08701d352a..7c89a9ba7e01 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -17,10 +17,53 @@ from six.moves import http_client import unittest +from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig +from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.dataset import DatasetReference + +import mock + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project='test-project', connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _Connection() + + client = Client( + project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + + +class Test__int_or_none(unittest.TestCase): + + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._int_or_none(*args, **kwargs) + + def test_w_int(self): + self.assertEqual(self._call_fut(13), 13) + + def test_w_none(self): + self.assertIsNone(self._call_fut(None)) + + def test_w_str(self): + self.assertEqual(self._call_fut('13'), 13) + class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): from google.cloud.bigquery import job + return job._error_result_to_exception(*args, **kwargs) def test_simple(self): @@ -40,11 +83,16 @@ def test_missing_reason(self): class _Base(object): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' - DS_NAME = 'datset_name' - TABLE_NAME = 'table_name' - JOB_NAME = 'job_name' + DS_ID = 'datset_id' + DS_REF = DatasetReference(PROJECT, DS_ID) + TABLE_ID = 'table_id' + TABLE_REF = TableReference(DS_REF, TABLE_ID) + JOB_ID = 'JOB_ID' def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -57,10 +105,15 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.USER_EMAIL = 'phred@example.com' + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + def _makeResource(self, started=False, ended=False): self._setUpConstants() resource = { @@ -74,10 +127,10 @@ def _makeResource(self, started=False, ended=False): } }, 'etag': self.ETAG, - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'selfLink': self.RESOURCE_URL, 'user_email': self.USER_EMAIL, @@ -89,6 +142,13 @@ def _makeResource(self, started=False, ended=False): if ended: resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000 + if self.JOB_TYPE == 'query': + resource['configuration']['query']['destinationTable'] = { + 'projectId': self.PROJECT, + 'datasetId': '_temp_dataset', + 'tableId': '_temp_table', + } + return resource def _verifyInitialReadonlyProperties(self, job): @@ -143,31 +203,31 @@ def _verifyReadonlyResourceProperties(self, job, resource): self.assertIsNone(job.user_email) -class TestLoadTableFromStorageJob(unittest.TestCase, _Base): +class TestLoadJob(unittest.TestCase, _Base): JOB_TYPE = 'load' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import LoadTableFromStorageJob + from google.cloud.bigquery.job import LoadJob - return LoadTableFromStorageJob + return LoadJob def _setUpConstants(self): - super(TestLoadTableFromStorageJob, self)._setUpConstants() + super(TestLoadJob, self)._setUpConstants() self.INPUT_FILES = 2 self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 def _makeResource(self, started=False, ended=False): - resource = super(TestLoadTableFromStorageJob, self)._makeResource( + resource = super(TestLoadJob, self)._makeResource( started, ended) config = resource['configuration']['load'] config['sourceUris'] = [self.SOURCE1] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, } if ended: @@ -235,8 +295,8 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) if 'fieldDelimiter' in config: self.assertEqual(job.field_delimiter, @@ -259,22 +319,22 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.quote_character) if 'skipLeadingRows' in config: - self.assertEqual(job.skip_leading_rows, + self.assertEqual(str(job.skip_leading_rows), config['skipLeadingRows']) else: self.assertIsNone(job.skip_leading_rows) def test_ctor(self): - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - self.assertIs(job.destination, table) + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client) + self.assertIs(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self.assertEqual(job.schema, []) self._verifyInitialReadonlyProperties(job) @@ -300,25 +360,26 @@ def test_ctor(self): self.assertIsNone(job.source_format) self.assertIsNone(job.write_disposition) - def test_ctor_w_schema(self): + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() + client = _make_client(project=self.PROJECT) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client, - schema=[full_name, age]) + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client, config) self.assertEqual(job.schema, [full_name, age]) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) def test_result(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -326,15 +387,16 @@ def test_result(self): self.assertIs(result, job) - def test_result_invokes_begins(self): + def test_result_invokes_begin(self): begun_resource = self._makeResource() done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection(begun_resource, done_resource) - client = _Client(self.PROJECT, connection=connection) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + client = _make_client(self.PROJECT) + client._connection = connection + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client) job.result() self.assertEqual(len(connection._requested), 2) @@ -343,67 +405,26 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request['method'], 'GET') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() with self.assertRaises(TypeError): - job.schema = object() + config.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): - job.schema = [full_name, object()] + config.schema = [full_name, object()] def test_schema_setter(self): from google.cloud.bigquery.schema import SchemaField - client = _Client(self.PROJECT) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + config = LoadJobConfig() full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job.schema = [full_name, age] - self.assertEqual(job.schema, [full_name, age]) - - def test_schema_setter_w_autodetect(self): - from google.cloud.bigquery.schema import SchemaField - - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.schema, [full_name]) - - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True - with self.assertRaises(ValueError): - job.schema = [full_name] - - def test_autodetect_setter_w_schema(self): - from google.cloud.bigquery.schema import SchemaField - - client = _Client(self.PROJECT) - table = _Table() - full_name = SchemaField('full_name', 'STRING') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - - job.autodetect = True - job.schema = [] - self.assertEqual(job.schema, []) - - job.autodetect = False - job.schema = [full_name] - self.assertEqual(job.autodetect, False) - - with self.assertRaises(ValueError): - job.autodetect = True + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) def test_props_set_by_server(self): import datetime @@ -413,20 +434,20 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_NAME) + FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID) URL = 'http://example.com/projects/%s/jobs/%s' % ( - self.PROJECT, self.JOB_NAME) + self.PROJECT, self.JOB_ID) EMAIL = 'phred@example.com' ERROR_RESULT = {'debugInfo': 'DEBUG', 'location': 'LOCATION', 'message': 'MESSAGE', 'reason': 'REASON'} - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job._properties['etag'] = 'ETAG' - job._properties['id'] = JOB_ID + job._properties['id'] = FULL_JOB_ID job._properties['selfLink'] = URL job._properties['user_email'] = EMAIL @@ -469,7 +490,7 @@ def test_props_set_by_server(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -477,12 +498,12 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.JOB_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -491,20 +512,20 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': self.JOB_ID, + 'id': self.FULL_JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, } }, @@ -515,22 +536,24 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() + load_config = RESOURCE['configuration']['load'] + load_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client) job._properties['status'] = {'state': 'RUNNING'} with self.assertRaises(ValueError): - job.begin() + job._begin() def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -541,11 +564,11 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -554,15 +577,15 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, }, }, @@ -580,24 +603,25 @@ def test_begin_w_autodetect(self): del resource['selfLink'] del resource['user_email'] conn = _Connection(resource) - client = _Client(project=self.PROJECT, connection=conn) - table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) - job.autodetect = True - job.begin() + client = _make_client(project=self.PROJECT, connection=conn) + config = LoadJobConfig() + config.autodetect = True + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client, config) + job._begin() sent = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': { 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, 'autodetect': True }, @@ -620,8 +644,8 @@ def test_begin_w_alternate_client(self): 'sourceUris': [self.SOURCE1], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME, + 'datasetId': self.DS_ID, + 'tableId': self.TABLE_ID, }, 'allowJaggedRows': True, 'allowQuotedNewlines': True, @@ -632,7 +656,7 @@ def test_begin_w_alternate_client(self): 'maxBadRecords': 100, 'nullMarker': r'\N', 'quote': "'", - 'skipLeadingRows': 1, + 'skipLeadingRows': '1', 'sourceFormat': 'CSV', 'writeDisposition': 'WRITE_TRUNCATE', 'schema': {'fields': [ @@ -642,29 +666,29 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['load'] = LOAD_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - table = _Table() + client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1, - schema=[full_name, age]) - - job.allow_jagged_rows = True - job.allow_quoted_newlines = True - job.create_disposition = 'CREATE_NEVER' - job.encoding = 'ISO-8559-1' - job.field_delimiter = '|' - job.ignore_unknown_values = True - job.max_bad_records = 100 - job.null_marker = r'\N' - job.quote_character = "'" - job.skip_leading_rows = 1 - job.source_format = 'CSV' - job.write_disposition = 'WRITE_TRUNCATE' - - job.begin(client=client2) + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, + client1, config) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = 'CREATE_NEVER' + config.encoding = 'ISO-8559-1' + config.field_delimiter = '|' + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r'\N' + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = 'CSV' + config.write_disposition = 'WRITE_TRUNCATE' + + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -674,21 +698,22 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'load': LOAD_CONFIGURATION, }, } + self.maxDiff = None self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) self.assertFalse(job.exists()) @@ -699,13 +724,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) self.assertTrue(job.exists(client=client2)) @@ -717,12 +742,12 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.reload() @@ -733,14 +758,14 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.reload(client=client2) @@ -752,13 +777,13 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_bound_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn = _Connection(RESPONSE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client) job.cancel() @@ -769,15 +794,15 @@ def test_cancel_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_cancel_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource(ended=True) RESPONSE = {'job': RESOURCE} conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) table = _Table() - job = self._make_one(self.JOB_NAME, table, [self.SOURCE1], client1) + job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1) job.cancel(client=client2) @@ -806,12 +831,12 @@ def _makeResource(self, started=False, ended=False): config = resource['configuration']['copy'] config['sourceTables'] = [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }] config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } @@ -824,8 +849,8 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['destinationTable'] self.assertEqual(job.destination.project, table_ref['projectId']) - self.assertEqual(job.destination.dataset_name, table_ref['datasetId']) - self.assertEqual(job.destination.name, table_ref['tableId']) + self.assertEqual(job.destination.dataset_id, table_ref['datasetId']) + self.assertEqual(job.destination.table_id, table_ref['tableId']) sources = config.get('sourceTables') if sources is None: @@ -833,8 +858,8 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(len(sources), len(job.sources)) for table_ref, table in zip(sources, job.sources): self.assertEqual(table.project, table_ref['projectId']) - self.assertEqual(table.dataset_name, table_ref['datasetId']) - self.assertEqual(table.name, table_ref['tableId']) + self.assertEqual(table.dataset_id, table_ref['datasetId']) + self.assertEqual(table.table_id, table_ref['tableId']) if 'createDisposition' in config: self.assertEqual(job.create_disposition, @@ -849,17 +874,17 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor(self): - client = _Client(self.PROJECT) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + client = _make_client(project=self.PROJECT) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertIs(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) @@ -869,7 +894,7 @@ def test_ctor(self): def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -877,12 +902,12 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -891,23 +916,23 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -920,23 +945,23 @@ def test_from_api_repr_bare(self): def test_from_api_repr_w_sourcetable(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -949,18 +974,18 @@ def test_from_api_repr_w_sourcetable(self): def test_from_api_repr_wo_sources(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, } @@ -971,12 +996,14 @@ def test_from_api_repr_wo_sources(self): klass.from_api_repr(RESOURCE, client=client) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() + copy_config = RESOURCE['configuration']['copy'] + copy_config['createDisposition'] = 'CREATE_IF_NEEDED' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -987,12 +1014,12 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1001,18 +1028,18 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, }, @@ -1027,12 +1054,12 @@ def test_begin_w_alternate_client(self): COPY_CONFIGURATION = { 'sourceTables': [{ 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }], 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, }, 'createDisposition': 'CREATE_NEVER', @@ -1040,17 +1067,17 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['copy'] = COPY_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) - - job.create_disposition = 'CREATE_NEVER' - job.write_disposition = 'WRITE_TRUNCATE' - - job.begin(client=client2) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = 'CREATE_NEVER' + config.write_disposition = 'WRITE_TRUNCATE' + job = self._make_one(self.JOB_ID, [source], destination, client1, + config) + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -1060,7 +1087,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'copy': COPY_CONFIGURATION, @@ -1070,12 +1097,13 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + client = _make_client(project=self.PROJECT, connection=conn) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) self.assertFalse(job.exists()) @@ -1086,14 +1114,14 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) self.assertTrue(job.exists(client=client2)) @@ -1105,13 +1133,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) job.reload() @@ -1122,15 +1150,15 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - destination = _Table(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_NAME, destination, [source], client1) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) job.reload(client=client2) @@ -1142,24 +1170,24 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) -class TestExtractTableToStorageJob(unittest.TestCase, _Base): +class TestExtractJob(unittest.TestCase, _Base): JOB_TYPE = 'extract' SOURCE_TABLE = 'source_table' DESTINATION_URI = 'gs://bucket_name/object_name' @staticmethod def _get_target_class(): - from google.cloud.bigquery.job import ExtractTableToStorageJob + from google.cloud.bigquery.job import ExtractJob - return ExtractTableToStorageJob + return ExtractJob def _makeResource(self, started=False, ended=False): - resource = super(TestExtractTableToStorageJob, self)._makeResource( + resource = super(TestExtractJob, self)._makeResource( started, ended) config = resource['configuration']['extract'] config['sourceTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, } config['destinationUris'] = [self.DESTINATION_URI] @@ -1174,37 +1202,37 @@ def _verifyResourceProperties(self, job, resource): table_ref = config['sourceTable'] self.assertEqual(job.source.project, table_ref['projectId']) - self.assertEqual(job.source.dataset_name, table_ref['datasetId']) - self.assertEqual(job.source.name, table_ref['tableId']) + self.assertEqual(job.source.dataset_id, table_ref['datasetId']) + self.assertEqual(job.source.table_id, table_ref['tableId']) if 'compression' in config: - self.assertEqual(job.compression, - config['compression']) + self.assertEqual( + job.compression, config['compression']) else: self.assertIsNone(job.compression) if 'destinationFormat' in config: - self.assertEqual(job.destination_format, - config['destinationFormat']) + self.assertEqual( + job.destination_format, config['destinationFormat']) else: self.assertIsNone(job.destination_format) if 'fieldDelimiter' in config: - self.assertEqual(job.field_delimiter, - config['fieldDelimiter']) + self.assertEqual( + job.field_delimiter, config['fieldDelimiter']) else: self.assertIsNone(job.field_delimiter) if 'printHeader' in config: - self.assertEqual(job.print_header, - config['printHeader']) + self.assertEqual( + job.print_header, config['printHeader']) else: self.assertIsNone(job.print_header) def test_ctor(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertEqual(job.source, source) self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) @@ -1212,19 +1240,36 @@ def test_ctor(self): self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + # set/read from resource['configuration']['extract'] self.assertIsNone(job.compression) self.assertIsNone(job.destination_format) self.assertIsNone(job.field_delimiter) self.assertIsNone(job.print_header) + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _make_client(project=self.PROJECT) + source = _Table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], + client) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics['extract'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats['destinationUriFileCounts'] = str(file_counts) + self.assertEqual(job.destination_uri_file_counts, file_counts) + def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1232,12 +1277,12 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1246,18 +1291,18 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1270,12 +1315,14 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() + extract_config = RESOURCE['configuration']['extract'] + extract_config['compression'] = 'GZIP' klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_begin_w_bound_client(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1286,12 +1333,13 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1300,13 +1348,13 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE }, 'destinationUris': [self.DESTINATION_URI], @@ -1322,7 +1370,7 @@ def test_begin_w_alternate_client(self): EXTRACT_CONFIGURATION = { 'sourceTable': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.SOURCE_TABLE, }, 'destinationUris': [self.DESTINATION_URI], @@ -1333,19 +1381,20 @@ def test_begin_w_alternate_client(self): } RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], - client1) - - job.compression = 'GZIP' - job.destination_format = 'NEWLINE_DELIMITED_JSON' - job.field_delimiter = '|' - job.print_header = False - - job.begin(client=client2) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job_config = ExtractJobConfig() + job_config.compression = 'GZIP' + job_config.destination_format = 'NEWLINE_DELIMITED_JSON' + job_config.field_delimiter = '|' + job_config.print_header = False + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], + client1, job_config) + + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -1355,7 +1404,7 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'extract': EXTRACT_CONFIGURATION, @@ -1365,11 +1414,11 @@ def test_begin_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) self.assertFalse(job.exists()) @@ -1381,13 +1430,13 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) + client2 = _make_client(project=self.PROJECT, connection=conn2) source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) self.assertTrue(job.exists(client=client2)) @@ -1400,12 +1449,13 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) job.reload() @@ -1417,14 +1467,15 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) RESOURCE = self._makeResource() conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - source = _Table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) job.reload(client=client2) @@ -1437,6 +1488,69 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) +class TestQueryJobConfig(unittest.TestCase, _Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {}) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + + def test_from_api_repr_normal(self): + resource = { + 'useLegacySql': True, + 'query': 'no property for me', + 'defaultDataset': { + 'projectId': 'someproject', + 'datasetId': 'somedataset', + }, + 'someNewProperty': 'I should be saved, too.', + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, + DatasetReference('someproject', 'somedataset')) + # Make sure unknown properties propagate. + self.assertEqual(config._properties['query'], 'no property for me') + self.assertEqual( + config._properties['someNewProperty'], 'I should be saved, too.') + + def test_to_api_repr_normal(self): + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference( + 'someproject', 'somedataset') + config._properties['someNewProperty'] = 'Woohoo, alpha stuff.' + + resource = config.to_api_repr() + + self.assertTrue(resource['useLegacySql']) + self.assertEqual( + resource['defaultDataset']['projectId'], 'someproject') + self.assertEqual( + resource['defaultDataset']['datasetId'], 'somedataset') + # Make sure unknown properties propagate. + self.assertEqual( + config._properties['someNewProperty'], 'Woohoo, alpha stuff.') + + class TestQueryJob(unittest.TestCase, _Base): JOB_TYPE = 'query' QUERY = 'select count(*) from persons' @@ -1484,13 +1598,14 @@ def _verifyBooleanResourceProperties(self, job, config): def _verifyIntegerResourceProperties(self, job, config): if 'maximumBillingTier' in config: - self.assertEqual(job.maximum_billing_tier, - config['maximumBillingTier']) + self.assertEqual( + job.maximum_billing_tier, config['maximumBillingTier']) else: self.assertIsNone(job.maximum_billing_tier) if 'maximumBytesBilled' in config: - self.assertEqual(job.maximum_bytes_billed, - config['maximumBytesBilled']) + self.assertEqual( + str(job.maximum_bytes_billed), config['maximumBytesBilled']) + self.assertIsInstance(job.maximum_bytes_billed, int) else: self.assertIsNone(job.maximum_bytes_billed) @@ -1511,6 +1626,17 @@ def _verifyQueryParameters(self, job, config): for found, expected in zip(job.query_parameters, query_parameters): self.assertEqual(found.to_api_repr(), expected) + def _verify_table_definitions(self, job, config): + table_defs = config.get('tableDefinitions') + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_configuration_properties(self, job, configuration): if 'dryRun' in configuration: self.assertEqual(job.dry_run, @@ -1529,6 +1655,7 @@ def _verifyResourceProperties(self, job, resource): self._verifyIntegerResourceProperties(job, query_config) self._verify_udf_resources(job, query_config) self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config['query']) if 'createDisposition' in query_config: @@ -1537,10 +1664,10 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.create_disposition) if 'defaultDataset' in query_config: - dataset = job.default_dataset + ds_ref = job.default_dataset ds_ref = { - 'projectId': dataset.project, - 'datasetId': dataset.name, + 'projectId': ds_ref.project, + 'datasetId': ds_ref.dataset_id, } self.assertEqual(ds_ref, query_config['defaultDataset']) else: @@ -1549,8 +1676,8 @@ def _verifyResourceProperties(self, job, resource): table = job.destination tb_ref = { 'projectId': table.project, - 'datasetId': table.dataset_name, - 'tableId': table.name + 'datasetId': table.dataset_id, + 'tableId': table.table_id } self.assertEqual(tb_ref, query_config['destinationTable']) else: @@ -1567,18 +1694,20 @@ def _verifyResourceProperties(self, job, resource): self.assertIsNone(job.write_disposition) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertEqual(job.query, self.QUERY) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) self.assertEqual( job.path, - '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME)) + '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)) self._verifyInitialReadonlyProperties(job) - # set/read from resource['configuration']['copy'] + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] self.assertIsNone(job.allow_large_results) self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) @@ -1586,34 +1715,40 @@ def test_ctor_defaults(self): self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.use_legacy_sql) self.assertIsNone(job.dry_run) self.assertIsNone(job.write_disposition) self.assertIsNone(job.maximum_billing_tier) self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + client = _make_client(project=self.PROJECT) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.udf_resources, udf_resources) def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + client = _make_client(project=self.PROJECT) + config = QueryJobConfig() + config.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) self.assertEqual(job.query_parameters, query_parameters) def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): @@ -1621,12 +1756,12 @@ def test_from_api_repr_missing_identity(self): def test_from_api_repr_missing_config(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { - 'id': '%s:%s' % (self.PROJECT, self.DS_NAME), + 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, } } klass = self._get_target_class() @@ -1635,15 +1770,15 @@ def test_from_api_repr_missing_config(self): def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = { 'id': self.JOB_ID, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { - 'query': {'query': self.QUERY} + 'query': {'query': self.QUERY}, }, } klass = self._get_target_class() @@ -1652,21 +1787,24 @@ def test_from_api_repr_bare(self): self._verifyResourceProperties(job, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) RESOURCE = self._makeResource() - RESOURCE['configuration']['query']['destinationTable'] = { + query_config = RESOURCE['configuration']['query'] + query_config['createDisposition'] = 'CREATE_IF_NEEDED' + query_config['writeDisposition'] = 'WRITE_TRUNCATE' + query_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.DESTINATION_TABLE, } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) - self._verifyResourceProperties(dataset, RESOURCE) + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) def test_cancelled(self): - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties['status'] = { 'state': 'DONE', 'errorResult': { @@ -1677,11 +1815,284 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) def test_done(self): - client = _Client(self.PROJECT) + client = _make_client(project=self.PROJECT) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) self.assertTrue(job.done()) + def test_query_plan(self): + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [{ + 'name': 'NAME', + 'id': 1234, + 'waitRatioAvg': 2.71828, + 'waitRatioMax': 3.14159, + 'readRatioAvg': 1.41421, + 'readRatioMax': 1.73205, + 'computeRatioAvg': 0.69315, + 'computeRatioMax': 1.09861, + 'writeRatioAvg': 3.32193, + 'writeRatioMax': 2.30258, + 'recordsRead': '100', + 'recordsWritten': '1', + 'status': 'STATUS', + 'steps': [{ + 'kind': 'KIND', + 'substeps': ['SUBSTEP1', 'SUBSTEP2'], + }], + }] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.query_plan, []) + + query_stats['queryPlan'] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected['name']) + self.assertEqual(found.entry_id, expected['id']) + self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg']) + self.assertEqual(found.wait_ratio_max, expected['waitRatioMax']) + self.assertEqual(found.read_ratio_avg, expected['readRatioAvg']) + self.assertEqual(found.read_ratio_max, expected['readRatioMax']) + self.assertEqual( + found.compute_ratio_avg, expected['computeRatioAvg']) + self.assertEqual( + found.compute_ratio_max, expected['computeRatioMax']) + self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg']) + self.assertEqual(found.write_ratio_max, expected['writeRatioMax']) + self.assertEqual( + found.records_read, int(expected['recordsRead'])) + self.assertEqual( + found.records_written, int(expected['recordsWritten'])) + self.assertEqual(found.status, expected['status']) + + self.assertEqual(len(found.steps), len(expected['steps'])) + for f_step, e_step in zip(found.steps, expected['steps']): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step['kind']) + self.assertEqual(f_step.substeps, e_step['substeps']) + + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats['totalBytesProcessed'] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats['totalBytesBilled'] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.billing_tier) + + query_stats['billingTier'] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.cache_hit) + + query_stats['cacheHit'] = True + self.assertTrue(job.cache_hit) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats['numDmlAffectedRows'] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_statement_type(self): + statement_type = 'SELECT' + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.statement_type) + + query_stats['statementType'] = statement_type + self.assertEqual(job.statement_type, statement_type) + + def test_referenced_tables(self): + from google.cloud.bigquery.table import TableReference + + ref_tables_resource = [{ + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local1', + }, { + + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local2', + }, { + + 'projectId': 'other-project-123', + 'datasetId': 'other-dataset', + 'tableId': 'other-table', + }] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats['referencedTables'] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, TableReference) + self.assertEqual(local1.table_id, 'local1') + self.assertEqual(local1.dataset_id, 'dataset') + self.assertEqual(local1.project, self.PROJECT) + + self.assertIsInstance(local2, TableReference) + self.assertEqual(local2.table_id, 'local2') + self.assertEqual(local2.dataset_id, 'dataset') + self.assertEqual(local2.project, self.PROJECT) + + self.assertIsInstance(remote, TableReference) + self.assertEqual(remote.table_id, 'other-table') + self.assertEqual(remote.dataset_id, 'other-dataset') + self.assertEqual(remote.project, 'other-project-123') + + def test_undeclared_query_paramters(self): + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter + + undeclared = [{ + 'name': 'my_scalar', + 'parameterType': { + 'type': 'STRING', + }, + 'parameterValue': { + 'value': 'value', + }, + }, { + 'name': 'my_array', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '1066'}, + {'value': '1745'}, + ], + }, + }, { + 'name': 'my_struct', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [{ + 'name': 'count', + 'type': { + 'type': 'INT64', + } + }], + }, + 'parameterValue': { + 'structValues': { + 'count': { + 'value': '123', + }, + } + }, + }] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.undeclared_query_paramters, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats['undeclaredQueryParamters'] = undeclared + + scalar, array, struct = job.undeclared_query_paramters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, 'my_scalar') + self.assertEqual(scalar.type_, 'STRING') + self.assertEqual(scalar.value, 'value') + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, 'my_array') + self.assertEqual(array.array_type, 'INT64') + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, 'my_struct') + self.assertEqual(struct.struct_types, {'count': 'INT64'}) + self.assertEqual(struct.struct_values, {'count': 123}) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -1689,21 +2100,27 @@ def test_query_results(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource) - client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) results = job.query_results() self.assertIsInstance(results, QueryResults) def test_query_results_w_cached_value(self): from google.cloud.bigquery.query import QueryResults - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - query_results = QueryResults(None, client) + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + resource = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + } + query_results = QueryResults(resource) job._query_results = query_results results = job.query_results() @@ -1715,11 +2132,11 @@ def test_result(self): 'jobComplete': True, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } connection = _Connection(query_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) + client = _make_client(self.PROJECT, connection=connection) resource = self._makeResource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1729,21 +2146,22 @@ def test_result(self): def test_result_invokes_begins(self): begun_resource = self._makeResource() - incomplete_resource = {'jobComplete': False} - query_resource = { - 'jobComplete': True, + incomplete_resource = { + 'jobComplete': False, 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, } + query_resource = copy.deepcopy(incomplete_resource) + query_resource['jobComplete'] = True done_resource = copy.deepcopy(begun_resource) done_resource['status'] = {'state': 'DONE'} connection = _Connection( begun_resource, incomplete_resource, query_resource, done_resource, query_resource) - client = _Client(self.PROJECT, connection=connection) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) job.result() @@ -1753,11 +2171,39 @@ def test_result_invokes_begins(self): self.assertEqual(query_request['method'], 'GET') self.assertEqual(reload_request['method'], 'GET') + def test_result_w_timeout(self): + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.result(timeout=1.0) + + self.assertEqual(len(connection._requested), 3) + begin_request, query_request, reload_request = connection._requested + self.assertEqual(begin_request['method'], 'POST') + self.assertEqual(query_request['method'], 'GET') + self.assertEqual( + query_request['path'], + '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID)) + self.assertEqual(query_request['query_params']['timeoutMs'], 900) + self.assertEqual(reload_request['method'], 'GET') + def test_result_error(self): from google.cloud import exceptions - client = _Client(self.PROJECT) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) error_result = { 'debugInfo': 'DEBUG', 'location': 'LOCATION', @@ -1778,7 +2224,11 @@ def test_result_error(self): self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + PATH = '/projects/%s/jobs' % (self.PROJECT,) + DS_ID = 'DATASET' RESOURCE = self._makeResource() # Ensure None for missing server-set props del RESOURCE['statistics']['creationTime'] @@ -1786,10 +2236,16 @@ def test_begin_w_bound_client(self): del RESOURCE['selfLink'] del RESOURCE['user_email'] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT, connection=conn) + + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) + + job._begin() - job.begin() + self.assertIsNone(job.default_dataset) self.assertEqual(job.udf_resources, []) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1798,24 +2254,29 @@ def test_begin_w_bound_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'useLegacySql': False, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': DS_ID, + }, }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import Table + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig PATH = '/projects/%s/jobs' % (self.PROJECT,) TABLE = 'TABLE' - DS_NAME = 'DATASET' + DS_ID = 'DATASET' RESOURCE = self._makeResource(ended=True) QUERY_CONFIGURATION = { 'query': self.QUERY, @@ -1823,11 +2284,11 @@ def test_begin_w_alternate_client(self): 'createDisposition': 'CREATE_NEVER', 'defaultDataset': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, }, 'destinationTable': { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': TABLE, }, 'flattenResults': True, @@ -1836,33 +2297,34 @@ def test_begin_w_alternate_client(self): 'useLegacySql': True, 'writeDisposition': 'WRITE_TRUNCATE', 'maximumBillingTier': 4, - 'maximumBytesBilled': 123456 + 'maximumBytesBilled': '123456' } RESOURCE['configuration']['query'] = QUERY_CONFIGURATION + RESOURCE['configuration']['dryRun'] = True conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) - - dataset = Dataset(DS_NAME, client1) - table = Table(TABLE, dataset) - - job.allow_large_results = True - job.create_disposition = 'CREATE_NEVER' - job.default_dataset = dataset - job.destination = table - job.flatten_results = True - job.priority = 'INTERACTIVE' - job.use_query_cache = True - job.use_legacy_sql = True - job.dry_run = True - RESOURCE['configuration']['dryRun'] = True - job.write_disposition = 'WRITE_TRUNCATE' - job.maximum_billing_tier = 4 - job.maximum_bytes_billed = 123456 - - job.begin(client=client2) + client2 = _make_client(project=self.PROJECT, connection=conn2) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(TABLE) + + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = 'CREATE_NEVER' + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = 'INTERACTIVE' + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = 'WRITE_TRUNCATE' + config.maximum_bytes_billed = 123456 + job = self._make_one( + self.JOB_ID, self.QUERY, client1, job_config=config) + + job._begin(client=client2) self.assertEqual(len(conn1._requested), 0) self.assertEqual(len(conn2._requested), 1) @@ -1872,18 +2334,19 @@ def test_begin_w_alternate_client(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'dryRun': True, 'query': QUERY_CONFIGURATION, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_udf(self): - from google.cloud.bigquery._helpers import UDFResource + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource RESOURCE_URI = 'gs://some-bucket/js/lib.js' INLINE_UDF_CODE = 'var someCode = "here";' @@ -1899,15 +2362,18 @@ def test_begin_w_udf(self): {'inlineCode': INLINE_UDF_CODE}, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) + client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), UDFResource("inlineCode", INLINE_UDF_CODE), ] - job = self._make_one(self.JOB_NAME, self.QUERY, client, - udf_resources=udf_resources) + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1917,11 +2383,12 @@ def test_begin_w_udf(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': True, 'userDefinedFunctionResources': [ {'resourceUri': RESOURCE_URI}, {'inlineCode': INLINE_UDF_CODE}, @@ -1929,11 +2396,12 @@ def test_begin_w_udf(self): }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -1957,11 +2425,13 @@ def test_begin_w_named_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -1971,21 +2441,23 @@ def test_begin_w_named_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'NAMED', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter query_parameters = [ScalarQueryParameter.positional('INT64', 123)] PATH = '/projects/%s/jobs' % (self.PROJECT,) @@ -2008,11 +2480,13 @@ def test_begin_w_positional_query_parameter(self): }, ] conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client, - query_parameters=query_parameters) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=jconfig) - job.begin() + job._begin() self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2022,20 +2496,26 @@ def test_begin_w_positional_query_parameter(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { 'query': self.QUERY, + 'useLegacySql': False, 'parameterMode': 'POSITIONAL', 'queryParameters': config['queryParameters'], }, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) + + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily - def test_dry_run_query(self): PATH = '/projects/%s/jobs' % (self.PROJECT,) RESOURCE = self._makeResource() # Ensure None for missing server-set props @@ -2043,13 +2523,99 @@ def test_dry_run_query(self): del RESOURCE['etag'] del RESOURCE['selfLink'] del RESOURCE['user_email'] + + bt_config = ExternalConfig('BIGTABLE') + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = 'cf' + col = BigtableColumn() + col.field_name = 'fn' + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + 'sourceFormat': 'BIGTABLE', + 'ignoreUnknownValues': True, + 'bigtableOptions': { + 'readRowkeyAsString': True, + 'columnFamilies': [{ + 'familyId': 'cf', + 'columns': [{'fieldName': 'fn'}], + }], + }, + } + CSV_CONFIG_RESOURCE = { + 'sourceFormat': 'CSV', + 'maxBadRecords': 8, + 'csvOptions': { + 'allowJaggedRows': True, + }, + } + csv_config = ExternalConfig('CSV') + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = 'bigtable-table' + csv_table = 'csv-table' + RESOURCE['configuration']['query']['tableDefinitions'] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.dry_run = True + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = { + bt_table: bt_config, + csv_table: csv_config, + } + config.use_legacy_sql = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) + + job._begin() + + self.assertEqual(len(conn._requested), 1) + req = conn._requested[0] + self.assertEqual(req['method'], 'POST') + self.assertEqual(req['path'], PATH) + SENT = { + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'configuration': { + 'query': { + 'query': self.QUERY, + 'useLegacySql': True, + 'tableDefinitions': { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + }, + }, + } + self._verifyResourceProperties(job, want_resource) + self.assertEqual(req['data'], SENT) + + def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + + PATH = '/projects/%s/jobs' % (self.PROJECT,) + RESOURCE = self._makeResource() + # Ensure None for missing server-set props + del RESOURCE['statistics']['creationTime'] + del RESOURCE['etag'] + del RESOURCE['selfLink'] + del RESOURCE['user_email'] RESOURCE['configuration']['dryRun'] = True + conn = _Connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.dry_run = True + job = self._make_one( + self.JOB_ID, self.QUERY, client, job_config=config) - job.begin() + job._begin() self.assertEqual(job.udf_resources, []) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2058,23 +2624,24 @@ def test_dry_run_query(self): SENT = { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, + 'jobId': self.JOB_ID, }, 'configuration': { 'query': { - 'query': self.QUERY + 'query': self.QUERY, + 'useLegacySql': False, }, 'dryRun': True, }, } - self.assertEqual(req['data'], SENT) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(req['data'], SENT) def test_exists_miss_w_bound_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, self.QUERY, client) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) self.assertFalse(job.exists()) @@ -2085,12 +2652,12 @@ def test_exists_miss_w_bound_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_exists_hit_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) self.assertTrue(job.exists(client=client2)) @@ -2102,24 +2669,24 @@ def test_exists_hit_w_alternate_client(self): self.assertEqual(req['query_params'], {'fields': 'id'}) def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import Table + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_NAME, None, client) - - dataset = Dataset(DS_NAME, client) - table = Table(DEST_TABLE, dataset) - job.destination = table + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) job.reload() - self.assertIsNone(job.destination) + self.assertNotEqual(job.destination, table_ref) self.assertEqual(len(conn._requested), 1) req = conn._requested[0] @@ -2128,21 +2695,21 @@ def test_reload_w_bound_client(self): self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): - PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_NAME) - DS_NAME = 'DATASET' + PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID) + DS_ID = 'DATASET' DEST_TABLE = 'dest_table' RESOURCE = self._makeResource() q_config = RESOURCE['configuration']['query'] q_config['destinationTable'] = { 'projectId': self.PROJECT, - 'datasetId': DS_NAME, + 'datasetId': DS_ID, 'tableId': DEST_TABLE, } conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) + client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_NAME, self.QUERY, client1) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) job.reload(client=client2) @@ -2154,42 +2721,201 @@ def test_reload_w_alternate_client(self): self._verifyResourceProperties(job, RESOURCE) -class _Client(object): +class TestQueryPlanEntryStep(unittest.TestCase, _Base): + KIND = 'KIND' + SUBSTEPS = ('SUB1', 'SUB2') - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep - def dataset(self, name): - from google.cloud.bigquery.dataset import Dataset + return QueryPlanEntryStep - return Dataset(name, client=self) + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) - def _get_query_results(self, job_id): - from google.cloud.bigquery.query import QueryResults + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = { + 'kind': self.KIND, + 'substeps': self.SUBSTEPS, + } + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one('OTHER', self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + +class TestQueryPlanEntry(unittest.TestCase, _Base): + NAME = 'NAME' + ENTRY_ID = 1234 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = 'STATUS' + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + entry = self._make_one( + name=self.NAME, + entry_id=self.ENTRY_ID, + wait_ratio_avg=self.WAIT_RATIO_AVG, + wait_ratio_max=self.WAIT_RATIO_MAX, + read_ratio_avg=self.READ_RATIO_AVG, + read_ratio_max=self.READ_RATIO_MAX, + compute_ratio_avg=self.COMPUTE_RATIO_AVG, + compute_ratio_max=self.COMPUTE_RATIO_MAX, + write_ratio_avg=self.WRITE_RATIO_AVG, + write_ratio_max=self.WRITE_RATIO_MAX, + records_read=self.RECORDS_READ, + records_written=self.RECORDS_WRITTEN, + status=self.STATUS, + steps=steps, + ) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS)] + resource = { + 'name': self.NAME, + 'id': self.ENTRY_ID, + 'waitRatioAvg': self.WAIT_RATIO_AVG, + 'waitRatioMax': self.WAIT_RATIO_MAX, + 'readRatioAvg': self.READ_RATIO_AVG, + 'readRatioMax': self.READ_RATIO_MAX, + 'computeRatioAvg': self.COMPUTE_RATIO_AVG, + 'computeRatioMax': self.COMPUTE_RATIO_MAX, + 'writeRatioAvg': self.WRITE_RATIO_AVG, + 'writeRatioMax': self.WRITE_RATIO_MAX, + 'recordsRead': str(self.RECORDS_READ), + 'recordsWritten': str(self.RECORDS_WRITTEN), + 'status': self.STATUS, + 'steps': [{ + 'kind': TestQueryPlanEntryStep.KIND, + 'substeps': TestQueryPlanEntryStep.SUBSTEPS, + }] + } + klass = self._get_target_class() - resource = self._connection.api_request(method='GET') - return QueryResults.from_api_repr(resource, self) + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) class _Table(object): - def __init__(self, name=None): - self._name = name + def __init__(self, table_id=None): + self._table_id = table_id @property - def name(self): - if self._name is not None: - return self._name - return TestLoadTableFromStorageJob.TABLE_NAME + def table_id(self): + return TestLoadJob.TABLE_ID @property def project(self): - return TestLoadTableFromStorageJob.PROJECT + return TestLoadJob.PROJECT @property - def dataset_name(self): - return TestLoadTableFromStorageJob.DS_NAME + def dataset_id(self): + return TestLoadJob.DS_ID class _Connection(object): diff --git a/bigquery/tests/unit/test_query.py b/bigquery/tests/unit/test_query.py index 76d5057f6450..e5c78caf3b0b 100644 --- a/bigquery/tests/unit/test_query.py +++ b/bigquery/tests/unit/test_query.py @@ -12,15 +12,970 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import unittest +import mock + + +class Test_UDFResource(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import UDFResource + + return UDFResource + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf.udf_type, 'resourceUri') + self.assertEqual(udf.value, 'gs://some_bucket/some_file') + + def test___eq__(self): + udf = self._make_one('resourceUri', 'gs://some_bucket/some_file') + self.assertEqual(udf, udf) + self.assertNotEqual(udf, object()) + wrong_val = self._make_one( + 'resourceUri', 'gs://some_bucket/other_file') + self.assertNotEqual(udf, wrong_val) + wrong_type = self._make_one('inlineCode', udf.value) + self.assertNotEqual(udf, wrong_type) + + +class Test__AbstractQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameter + + return _AbstractQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param = self._make_one() + with self.assertRaises(NotImplementedError): + param.to_api_repr() + + +class Test_ScalarQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test___eq__(self): + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', type_='INT64', value=123) + self.assertNotEqual(param, alias) + wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one(name='foo', type_='INT64', value=234) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': 123, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.type_, 'INT64') + self.assertEqual(param.value, 123) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + param = self._make_one(name='foo', type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'INT64', + }, + 'parameterValue': { + 'value': '123', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='INT64', value=123) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_float(self): + EXPECTED = { + 'parameterType': { + 'type': 'FLOAT64', + }, + 'parameterValue': { + 'value': 12.345, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='FLOAT64', value=12.345) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_bool(self): + EXPECTED = { + 'parameterType': { + 'type': 'BOOL', + }, + 'parameterValue': { + 'value': 'false', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='BOOL', value=False) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_datetime(self): + from google.cloud._helpers import UTC + + STAMP = '2016-12-20 15:58:27.339328+00:00' + when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC) + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': STAMP, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=when) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_micros(self): + from google.cloud._helpers import _microseconds_from_datetime + + now = datetime.datetime.utcnow() + seconds = _microseconds_from_datetime(now) / 1.0e6 + EXPECTED = { + 'parameterType': { + 'type': 'TIMESTAMP', + }, + 'parameterValue': { + 'value': seconds, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='TIMESTAMP', value=seconds) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z' + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_string(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + now = datetime.datetime.utcnow() + now_str = _datetime_to_rfc3339(now) + EXPECTED = { + 'parameterType': { + 'type': 'DATETIME', + }, + 'parameterValue': { + 'value': now_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATETIME', value=now_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + today = datetime.date.today() + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today.isoformat(), + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_string(self): + today = datetime.date.today() + today_str = today.isoformat(), + EXPECTED = { + 'parameterType': { + 'type': 'DATE', + }, + 'parameterValue': { + 'value': today_str, + }, + } + klass = self._get_target_class() + param = klass.positional(type_='DATE', value=today_str) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'UNKNOWN', + }, + 'parameterValue': { + 'value': 'unknown', + }, + } + klass = self._get_target_class() + param = klass.positional(type_='UNKNOWN', value='unknown') + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', 'value') + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('test', 'STRING', 'value') + other = self._make_one('other', 'STRING', 'value') + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', None) + other = self._make_one('test', 'INT64', None) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', 'hello') + other = self._make_one('test', 'STRING', 'world') + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', 'gotcha') + other = self._make_one('test', 'STRING', 'gotcha') + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', 13) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', 12) + field2 = self._make_one('test', 'INT64', 12) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', 11) + field2 = self._make_one('test', 'INT64', 12) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', 'value') + expected = "ScalarQueryParameter('field1', 'STRING', 'value')" + self.assertEqual(repr(field1), expected) + + +def _make_subparam(name, type_, value): + from google.cloud.bigquery.query import ScalarQueryParameter + + return ScalarQueryParameter(name, type_, value) + + +class Test_ArrayQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameter + + return ArrayQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test___eq__(self): + param = self._make_one(name='foo', array_type='INT64', values=[123]) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one(name='bar', array_type='INT64', values=[123]) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + name='foo', array_type='FLOAT64', values=[123.0]) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + name='foo', array_type='INT64', values=[234]) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.array_type, 'INT64') + self.assertEqual(param.values, [1, 2]) + + def test_from_api_repr_w_struct_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + { + 'name': 'name', + 'type': {'type': 'STRING'}, + }, + { + 'name': 'age', + 'type': {'type': 'INT64'}, + }, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'structValues': { + 'name': {'value': 'Phred Phlyntstone'}, + 'age': {'value': '32'}, + }, + }, + { + 'structValues': { + 'name': { + 'value': 'Bharney Rhubbyl', + }, + 'age': {'value': '31'}, + }, + }, + ], + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + phred = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Phred Phlyntstone'), + _make_subparam('age', 'INT64', 32)) + bharney = StructQueryParameter.positional( + _make_subparam('name', 'STRING', 'Bharney Rhubbyl'), + _make_subparam('age', 'INT64', 31)) + self.assertEqual(param.array_type, 'STRUCT') + self.assertEqual(param.values, [phred, bharney]) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + param = self._make_one(name='foo', array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'INT64', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': '1', + }, + { + 'value': '2' + }, + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='INT64', values=[1, 2]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'UNKNOWN', + }, + }, + 'parameterValue': { + 'arrayValues': [ + { + 'value': 'unknown', + } + ], + }, + } + klass = self._get_target_class() + param = klass.positional(array_type='UNKNOWN', values=['unknown']) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_record_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + EXPECTED = { + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + }, + 'parameterValue': { + 'arrayValues': [{ + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }] + }, + } + one = _make_subparam('foo', 'STRING', 'Foo') + another = _make_subparam('bar', 'INT64', 123) + struct = StructQueryParameter.positional(one, another) + klass = self._get_target_class() + param = klass.positional(array_type='RECORD', values=[struct]) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one('test', 'STRING', ['value']) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one('field', 'STRING', ['value']) + other = self._make_one('other', 'STRING', ['value']) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one('test', 'STRING', []) + other = self._make_one('test', 'INT64', []) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one('test', 'STRING', ['hello']) + other = self._make_one('test', 'STRING', ['hello', 'world']) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one('test', 'STRING', ['gotcha']) + other = self._make_one('test', 'STRING', ['gotcha']) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one('toast', 'INT64', [13]) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one('test', 'INT64', [12]) + field2 = self._make_one('test', 'INT64', [12]) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one('test', 'INT64', [11]) + field2 = self._make_one('test', 'INT64', [12]) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one('field1', 'STRING', ['value']) + expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" + self.assertEqual(repr(field1), expected) + + +class Test_StructQueryParameter(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameter + + return StructQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test___eq__(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + sub_3 = _make_subparam('baz', 'STRING', 'def') + sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0) + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one('bar', sub_1, sub_2) + self.assertNotEqual(param, alias) + wrong_type = self._make_one('foo', sub_1_float, sub_2) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one('foo', sub_2, sub_3) + self.assertNotEqual(param, wrong_val) + + def test_positional(self): + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_name(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_wo_name(self): + RESOURCE = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 123}, + 'baz': {'value': 'abc'}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.name, None) + self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'}) + self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'}) + + def test_from_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual( + param, + self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + ArrayQueryParameter('baz', 'INT64', [123, 456]))) + + def test_from_api_repr_w_nested_struct(self): + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + + expected = self._make_one( + 'foo', + _make_subparam('bar', 'STRING', 'abc'), + self._make_one( + 'baz', + _make_subparam('qux', 'INT64', 123), + _make_subparam('spam', 'BOOL', True))) + self.assertEqual(param.name, 'foo') + self.assertEqual(param.struct_types, expected.struct_types) + self.assertEqual(param.struct_values, expected.struct_values) + + def test_to_api_repr_w_name(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + param = self._make_one('foo', sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'INT64'}}, + {'name': 'baz', 'type': {'type': 'STRING'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': '123'}, + 'baz': {'value': 'abc'}, + }, + }, + } + sub_1 = _make_subparam('bar', 'INT64', 123) + sub_2 = _make_subparam('baz', 'STRING', 'abc') + klass = self._get_target_class() + param = klass.positional(sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'arrayValues': [ + {'value': '123'}, + {'value': '456'}, + ]}, + }, + }, + } + scalar = _make_subparam('bar', 'STRING', 'abc') + array = ArrayQueryParameter('baz', 'INT64', [123, 456]) + param = self._make_one('foo', scalar, array) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_nested_struct(self): + EXPECTED = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'bar', 'type': {'type': 'STRING'}}, + {'name': 'baz', 'type': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'qux', 'type': {'type': 'INT64'}}, + {'name': 'spam', 'type': {'type': 'BOOL'}}, + ], + }}, + ], + }, + 'parameterValue': { + 'structValues': { + 'bar': {'value': 'abc'}, + 'baz': {'structValues': { + 'qux': {'value': '123'}, + 'spam': {'value': 'true'}, + }}, + }, + }, + } + scalar_1 = _make_subparam('bar', 'STRING', 'abc') + scalar_2 = _make_subparam('qux', 'INT64', 123) + scalar_3 = _make_subparam('spam', 'BOOL', True) + sub = self._make_one('baz', scalar_2, scalar_3) + param = self._make_one('foo', scalar_1, sub) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test___eq___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___eq___name_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'abc')) + other = self._make_one( + 'other ', _make_subparam('bar', 'STRING', 'abc')) + self.assertNotEqual(field, other) + + def test___eq___field_type_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', None)) + other = self._make_one( + 'test', _make_subparam('bar', 'INT64', None)) + self.assertNotEqual(field, other) + + def test___eq___value_mismatch(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field, other) + + def test___eq___hit(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + other = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'gotcha')) + self.assertEqual(field, other) + + def test___ne___wrong_type(self): + field = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + other = object() + self.assertNotEqual(field, other) + self.assertEqual(field, mock.ANY) + + def test___ne___same_value(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = (field1 != field2) + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + field1 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'hello')) + field2 = self._make_one( + 'test', _make_subparam('bar', 'STRING', 'world')) + self.assertNotEqual(field1, field2) + + def test___repr__(self): + field1 = self._make_one( + 'test', _make_subparam('field1', 'STRING', 'hello')) + got = repr(field1) + self.assertIn('StructQueryParameter', got) + self.assertIn("'field1', 'STRING'", got) + self.assertIn("'field1': 'hello'", got) + class TestQueryResults(unittest.TestCase): PROJECT = 'project' - JOB_NAME = 'job_name' - JOB_NAME = 'test-synchronous-query' - JOB_TYPE = 'query' - QUERY = 'select count(*) from persons' + JOB_ID = 'test-synchronous-query' TOKEN = 'TOKEN' @staticmethod @@ -32,49 +987,14 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def _makeResource(self, complete=False): - resource = { + def _makeResource(self): + return { 'jobReference': { 'projectId': self.PROJECT, - 'jobId': self.JOB_NAME, - }, - 'jobComplete': complete, - 'errors': [], - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], + 'jobId': self.JOB_ID, }, } - if complete: - resource['totalRows'] = '1000' - resource['rows'] = [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': 32}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': 33}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': 29}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': 27}, - ]}, - ] - resource['pageToken'] = self.TOKEN - resource['totalBytesProcessed'] = 100000 - resource['numDmlAffectedRows'] = 123 - resource['cacheHit'] = False - - return resource - def _verifySchema(self, query, resource): from google.cloud.bigquery.schema import SchemaField @@ -92,668 +1012,242 @@ def _verifySchema(self, query, resource): else: self.assertEqual(query.schema, ()) - def _verifyRows(self, query, resource): - expected = resource.get('rows') - if expected is None: - self.assertEqual(query.rows, []) - else: - found = query.rows - self.assertEqual(len(found), len(expected)) - for f_row, e_row in zip(found, expected): - self.assertEqual(f_row, - tuple([cell['v'] for cell in e_row['f']])) - - def _verify_udf_resources(self, query, resource): - udf_resources = resource.get('userDefinedFunctionResources', ()) - self.assertEqual(len(query.udf_resources), len(udf_resources)) - for found, expected in zip(query.udf_resources, udf_resources): - if 'resourceUri' in expected: - self.assertEqual(found.udf_type, 'resourceUri') - self.assertEqual(found.value, expected['resourceUri']) - else: - self.assertEqual(found.udf_type, 'inlineCode') - self.assertEqual(found.value, expected['inlineCode']) - - def _verifyQueryParameters(self, query, resource): - query_parameters = resource.get('queryParameters', ()) - self.assertEqual(len(query.query_parameters), len(query_parameters)) - for found, expected in zip(query.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verifyResourceProperties(self, query, resource): - self.assertEqual(query.cache_hit, resource.get('cacheHit')) - self.assertEqual(query.complete, resource.get('jobComplete')) - self.assertEqual(query.errors, resource.get('errors')) - self.assertEqual(query.page_token, resource.get('pageToken')) - - if 'totalRows' in resource: - self.assertEqual(query.total_rows, int(resource['totalRows'])) - else: - self.assertIsNone(query.total_rows) - - if 'totalBytesProcessed' in resource: - self.assertEqual(query.total_bytes_processed, - int(resource['totalBytesProcessed'])) - else: - self.assertIsNone(query.total_bytes_processed) - - if 'jobReference' in resource: - self.assertEqual(query.name, resource['jobReference']['jobId']) - else: - self.assertIsNone(query.name) - - if 'numDmlAffectedRows' in resource: - self.assertEqual(query.num_dml_affected_rows, - int(resource['numDmlAffectedRows'])) - else: - self.assertIsNone(query.num_dml_affected_rows) - - self._verify_udf_resources(query, resource) - self._verifyQueryParameters(query, resource) - self._verifySchema(query, resource) - self._verifyRows(query, resource) - def test_ctor_defaults(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) self.assertIsNone(query.complete) self.assertIsNone(query.errors) - self.assertIsNone(query.name) self.assertIsNone(query.page_token) - self.assertEqual(query.query_parameters, []) + self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) self.assertEqual(query.schema, ()) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) - self.assertEqual(query.udf_resources, []) - - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.max_results) - self.assertIsNone(query.preserve_nulls) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, udf_resources=udf_resources) - self.assertEqual(query.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - self.assertEqual(query.query_parameters, query_parameters) - - def test_from_query_job(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - DS_NAME = 'DATASET' - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME, client) - job.use_query_cache = True - job.use_legacy_sql = True - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.name, self.JOB_NAME) - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIs(query.default_dataset, dataset) - self.assertTrue(query.use_query_cache) - self.assertTrue(query.use_legacy_sql) - - def test_from_query_job_wo_default_dataset(self): - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - client = _Client(self.PROJECT) - job = QueryJob( - self.JOB_NAME, self.QUERY, client, - udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - klass = self._get_target_class() - - query = klass.from_query_job(job) - - self.assertEqual(query.query, self.QUERY) - self.assertIs(query._client, client) - self.assertIs(query._job, job) - self.assertEqual(query.udf_resources, job.udf_resources) - self.assertIsNone(query.default_dataset) - self.assertIsNone(query.use_query_cache) - self.assertIsNone(query.use_legacy_sql) - - def test_job_wo_jobid(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.job) - - def test_job_w_jobid(self): - from google.cloud.bigquery.job import QueryJob - - SERVER_GENERATED = 'SERVER_GENERATED' - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = { - 'projectId': self.PROJECT, - 'jobId': SERVER_GENERATED, - } - job = query.job - self.assertIsInstance(job, QueryJob) - self.assertEqual(job.query, self.QUERY) - self.assertIs(job._client, client) - self.assertEqual(job.name, SERVER_GENERATED) - fetched_later = query.job - self.assertIs(fetched_later, job) def test_cache_hit_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.cache_hit) def test_cache_hit_present(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'cacheHit': True} - query._set_properties(resource) + resource = self._makeResource() + resource['cacheHit'] = True + query = self._make_one(resource) self.assertTrue(query.cache_hit) def test_complete_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.complete) def test_complete_present(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobComplete': True} - query._set_properties(resource) + resource = self._makeResource() + resource['jobComplete'] = True + query = self._make_one(resource) self.assertTrue(query.complete) def test_errors_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.errors) def test_errors_present(self): ERRORS = [ {'reason': 'testing'}, ] - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'errors': ERRORS} - query._set_properties(resource) + resource = self._makeResource() + resource['errors'] = ERRORS + query = self._make_one(resource) self.assertEqual(query.errors, ERRORS) - def test_name_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.name) + def test_job_id_missing(self): + with self.assertRaises(ValueError): + self._make_one({}) - def test_name_broken_job_reference(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + def test_job_id_broken_job_reference(self): resource = {'jobReference': {'bogus': 'BOGUS'}} - query._set_properties(resource) - self.assertIsNone(query.name) + with self.assertRaises(ValueError): + self._make_one(resource) - def test_name_present(self): - JOB_ID = 'JOB_ID' - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'jobReference': {'jobId': JOB_ID}} - query._set_properties(resource) - self.assertEqual(query.name, JOB_ID) + def test_job_id_present(self): + resource = self._makeResource() + resource['jobReference']['jobId'] = 'custom-job' + query = self._make_one(resource) + self.assertEqual(query.job_id, 'custom-job') def test_page_token_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.page_token) def test_page_token_present(self): - TOKEN = 'TOKEN' - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'pageToken': TOKEN} - query._set_properties(resource) - self.assertEqual(query.page_token, TOKEN) - - def test_total_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self.assertIsNone(query.total_rows) + resource = self._makeResource() + resource['pageToken'] = 'TOKEN' + query = self._make_one(resource) + self.assertEqual(query.page_token, 'TOKEN') def test_total_rows_present_integer(self): - TOTAL_ROWS = 42 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': TOTAL_ROWS} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = 42 + query = self._make_one(resource) + self.assertEqual(query.total_rows, 42) def test_total_rows_present_string(self): - TOTAL_ROWS = 42 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalRows': str(TOTAL_ROWS)} - query._set_properties(resource) - self.assertEqual(query.total_rows, TOTAL_ROWS) + resource = self._makeResource() + resource['totalRows'] = '42' + query = self._make_one(resource) + self.assertEqual(query.total_rows, 42) def test_total_bytes_processed_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.total_bytes_processed) def test_total_bytes_processed_present_integer(self): - TOTAL_BYTES_PROCESSED = 123456 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': TOTAL_BYTES_PROCESSED} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = 123456 + query = self._make_one(resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_total_bytes_processed_present_string(self): - TOTAL_BYTES_PROCESSED = 123456 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'totalBytesProcessed': str(TOTAL_BYTES_PROCESSED)} - query._set_properties(resource) - self.assertEqual(query.total_bytes_processed, TOTAL_BYTES_PROCESSED) + resource = self._makeResource() + resource['totalBytesProcessed'] = '123456' + query = self._make_one(resource) + self.assertEqual(query.total_bytes_processed, 123456) def test_num_dml_affected_rows_missing(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) + query = self._make_one(self._makeResource()) self.assertIsNone(query.num_dml_affected_rows) def test_num_dml_affected_rows_present_integer(self): - DML_AFFECTED_ROWS = 123456 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': DML_AFFECTED_ROWS} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = 123456 + query = self._make_one(resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_num_dml_affected_rows_present_string(self): - DML_AFFECTED_ROWS = 123456 - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - resource = {'numDmlAffectedRows': str(DML_AFFECTED_ROWS)} - query._set_properties(resource) - self.assertEqual(query.num_dml_affected_rows, DML_AFFECTED_ROWS) + resource = self._makeResource() + resource['numDmlAffectedRows'] = '123456' + query = self._make_one(resource) + self.assertEqual(query.num_dml_affected_rows, 123456) def test_schema(self): - client = _Client(self.PROJECT) - query = self._make_one(self.QUERY, client) - self._verifyResourceProperties(query, {}) - resource = { - 'schema': { - 'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, - ], - }, + query = self._make_one(self._makeResource()) + self._verifySchema(query, self._makeResource()) + resource = self._makeResource() + resource['schema'] = { + 'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'}, + ], } query._set_properties(resource) - self._verifyResourceProperties(query, resource) + self._verifySchema(query, resource) - def test_run_w_already_has_job(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._job = object() # simulate already running - with self.assertRaises(ValueError): - query.run() - - def test_run_w_already_has_job_in_properties(self): - JOB_ID = 'JOB_ID' - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._properties['jobReference'] = {'jobId': JOB_ID} - with self.assertRaises(ValueError): - query.run() - - def test_run_w_bound_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertEqual(query.udf_resources, []) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_alternate_client(self): - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=True) - DATASET = 'test_dataset' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - - query.default_dataset = client2.dataset(DATASET) - query.max_results = 100 - query.preserve_nulls = True - query.timeout_ms = 20000 - query.use_query_cache = False - query.use_legacy_sql = True - query.dry_run = True - - query.run(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'defaultDataset': { - 'projectId': self.PROJECT, - 'datasetId': DATASET, - }, - 'dryRun': True, - 'maxResults': 100, - 'preserveNulls': True, - 'timeoutMs': 20000, - 'useQueryCache': False, - 'useLegacySql': True, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_inline_udf(self): - from google.cloud.bigquery._helpers import UDFResource - - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'inlineCode': INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_udf_resource_uri(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': - [{'resourceUri': RESOURCE_URI}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_mixed_udfs(self): - from google.cloud.bigquery._helpers import UDFResource - - RESOURCE_URI = 'gs://some-bucket/js/lib.js' - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['userDefinedFunctionResources'] = [ - {'resourceUri': RESOURCE_URI}, - {'inlineCode': INLINE_UDF_CODE}, - ] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query.udf_resources = [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)] - - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(query.udf_resources, - [UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE)]) - SENT = {'query': self.QUERY, - 'userDefinedFunctionResources': [ - {'resourceUri': RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}]} - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_named_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'NAMED' - RESOURCE['queryParameters'] = [ - { - 'name': 'foo', - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'NAMED', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_run_w_positional_query_parameter(self): - from google.cloud.bigquery._helpers import ScalarQueryParameter - - PATH = 'projects/%s/queries' % self.PROJECT - RESOURCE = self._makeResource(complete=False) - RESOURCE['parameterMode'] = 'POSITIONAL' - RESOURCE['queryParameters'] = [ - { - 'parameterType': { - 'type': 'INT64', - }, - 'parameterValue': { - 'value': '123', - }, - }, - ] - query_parameters = [ScalarQueryParameter.positional('INT64', 123)] - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client, - query_parameters=query_parameters) - query.run() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'query': self.QUERY, - 'parameterMode': 'POSITIONAL', - 'queryParameters': RESOURCE['queryParameters'], - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(query, RESOURCE) - - def test_fetch_data_query_not_yet_run(self): - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - self.assertRaises(ValueError, query.fetch_data) - - def test_fetch_data_w_bound_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) - del AFTER['totalRows'] - - conn = _Connection(AFTER) - client = _Client(project=self.PROJECT, connection=conn) - query = self._make_one(self.QUERY, client) - query._set_properties(BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertIsNone(total_rows) - self.assertEqual(page_token, AFTER['pageToken']) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - - PATH = 'projects/%s/queries/%s' % (self.PROJECT, self.JOB_NAME) - MAX = 10 - TOKEN = 'TOKEN' - START = 2257 - TIMEOUT = 20000 - BEFORE = self._makeResource(complete=False) - AFTER = self._makeResource(complete=True) - - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(AFTER) - client2 = _Client(project=self.PROJECT, connection=conn2) - query = self._make_one(self.QUERY, client1) - query._set_properties(BEFORE) - self.assertFalse(query.complete) - - iterator = query.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN, - start_index=START, timeout_ms=TIMEOUT) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token +class Test__query_param_from_api_repr(unittest.TestCase): - self.assertTrue(query.complete) - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27)) - self.assertEqual(total_rows, int(AFTER['totalRows'])) - self.assertEqual(page_token, AFTER['pageToken']) + @staticmethod + def _call_fut(resource): + from google.cloud.bigquery.query import _query_param_from_api_repr + + return _query_param_from_api_repr(resource) - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, - 'pageToken': TOKEN, - 'startIndex': START, - 'timeoutMs': TIMEOUT}) + def test_w_scalar(self): + from google.cloud.bigquery.query import ScalarQueryParameter + RESOURCE = { + 'name': 'foo', + 'parameterType': {'type': 'INT64'}, + 'parameterValue': {'value': '123'}, + } -class _Client(object): + parameter = self._call_fut(RESOURCE) - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.type_, 'INT64') + self.assertEqual(parameter.value, 123) - def dataset(self, name): - from google.cloud.bigquery.dataset import Dataset + def test_w_scalar_timestamp(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter - return Dataset(name, client=self) + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'}, + } + parameter = self._call_fut(RESOURCE) -class _Connection(object): + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC)) + + def test_w_scalar_timestamp_micros(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.query import ScalarQueryParameter + + RESOURCE = { + 'name': 'zoned', + 'parameterType': {'type': 'TIMESTAMP'}, + 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ScalarQueryParameter) + self.assertEqual(parameter.name, 'zoned') + self.assertEqual(parameter.type_, 'TIMESTAMP') + self.assertEqual( + parameter.value, + datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC)) + + def test_w_array(self): + from google.cloud.bigquery.query import ArrayQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'ARRAY', + 'arrayType': {'type': 'INT64'}, + }, + 'parameterValue': { + 'arrayValues': [ + {'value': '123'}, + ]}, + } + + parameter = self._call_fut(RESOURCE) + + self.assertIsInstance(parameter, ArrayQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual(parameter.array_type, 'INT64') + self.assertEqual(parameter.values, [123]) + + def test_w_struct(self): + from google.cloud.bigquery.query import StructQueryParameter + + RESOURCE = { + 'name': 'foo', + 'parameterType': { + 'type': 'STRUCT', + 'structTypes': [ + {'name': 'foo', 'type': {'type': 'STRING'}}, + {'name': 'bar', 'type': {'type': 'INT64'}}, + ], + }, + 'parameterValue': { + 'structValues': { + 'foo': {'value': 'Foo'}, + 'bar': {'value': '123'}, + } + }, + } - def __init__(self, *responses): - self._responses = responses - self._requested = [] + parameter = self._call_fut(RESOURCE) - def api_request(self, **kw): - self._requested.append(kw) - response, self._responses = self._responses[0], self._responses[1:] - return response + self.assertIsInstance(parameter, StructQueryParameter) + self.assertEqual(parameter.name, 'foo') + self.assertEqual( + parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'}) + self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123}) diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py index d08e7757063e..84e5d306c348 100644 --- a/bigquery/tests/unit/test_schema.py +++ b/bigquery/tests/unit/test_schema.py @@ -236,3 +236,132 @@ def test___repr__(self): field1 = self._make_one('field1', 'STRING') expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())" self.assertEqual(repr(field1), expected) + + +# TODO: dedup with the same class in test_table.py. +class _SchemaBase(object): + + def _verify_field(self, field, r_field): + self.assertEqual(field.name, r_field['name']) + self.assertEqual(field.field_type, r_field['type']) + self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE')) + + def _verifySchema(self, schema, resource): + r_fields = resource['schema']['fields'] + self.assertEqual(len(schema), len(r_fields)) + + for field, r_field in zip(schema, r_fields): + self._verify_field(field, r_field) + + +class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _parse_schema_resource + + return _parse_schema_resource(resource) + + def _makeResource(self): + return { + 'schema': {'fields': [ + {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, + ]}, + } + + def test__parse_schema_resource_defaults(self): + RESOURCE = self._makeResource() + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_subfields(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + def test__parse_schema_resource_fields_without_mode(self): + RESOURCE = self._makeResource() + RESOURCE['schema']['fields'].append( + {'name': 'phone', + 'type': 'STRING'}) + + schema = self._call_fut(RESOURCE['schema']) + self._verifySchema(schema, RESOURCE) + + +class Test_build_schema_resource(unittest.TestCase, _SchemaBase): + + def _call_fut(self, resource): + from google.cloud.bigquery.schema import _build_schema_resource + + return _build_schema_resource(resource) + + def test_defaults(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_description(self): + from google.cloud.bigquery.schema import SchemaField + + DESCRIPTION = 'DESCRIPTION' + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', + description=DESCRIPTION) + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + resource = self._call_fut([full_name, age]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED', + 'description': DESCRIPTION}) + self.assertEqual(resource[1], + {'name': 'age', + 'type': 'INTEGER', + 'mode': 'REQUIRED'}) + + def test_w_subfields(self): + from google.cloud.bigquery.schema import SchemaField + + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + ph_type = SchemaField('type', 'STRING', 'REQUIRED') + ph_num = SchemaField('number', 'STRING', 'REQUIRED') + phone = SchemaField('phone', 'RECORD', mode='REPEATED', + fields=[ph_type, ph_num]) + resource = self._call_fut([full_name, phone]) + self.assertEqual(len(resource), 2) + self.assertEqual(resource[0], + {'name': 'full_name', + 'type': 'STRING', + 'mode': 'REQUIRED'}) + self.assertEqual(resource[1], + {'name': 'phone', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{'name': 'type', + 'type': 'STRING', + 'mode': 'REQUIRED'}, + {'name': 'number', + 'type': 'STRING', + 'mode': 'REQUIRED'}]}) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index aa9e00670655..a40ab160d970 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -12,14 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import email -import io -import json import unittest import mock -from six.moves import http_client -import pytest + +from google.cloud.bigquery.dataset import DatasetReference class _SchemaBase(object): @@ -37,10 +34,122 @@ def _verifySchema(self, schema, resource): self._verify_field(field, r_field) +class TestTableReference(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableReference + + return TableReference + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + + table_ref = self._make_one(dataset_ref, 'table_1') + self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id) + self.assertEqual(table_ref.table_id, 'table_1') + + def test_to_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table_ref = self._make_one(dataset_ref, 'table_1') + + resource = table_ref.to_api_repr() + + self.assertEqual( + resource, + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + def test_from_api_repr(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + expected = self._make_one(dataset_ref, 'table_1') + + got = TableReference.from_api_repr( + { + 'projectId': 'project_1', + 'datasetId': 'dataset_1', + 'tableId': 'table_1', + }) + + self.assertEqual(expected, got) + + def test___eq___wrong_type(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset_ref = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset_ref, 'table_1') + other = object() + self.assertNotEqual(table, other) + self.assertEqual(table, mock.ANY) + + def test___eq___project_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_2', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___dataset_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + other_dataset = DatasetReference('project_1', 'dataset_2') + table = self._make_one(dataset, 'table_1') + other = self._make_one(other_dataset, 'table_1') + self.assertNotEqual(table, other) + + def test___eq___table_mismatch(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_2') + self.assertNotEqual(table, other) + + def test___eq___equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table = self._make_one(dataset, 'table_1') + other = self._make_one(dataset, 'table_1') + self.assertEqual(table, other) + + def test___hash__set_equality(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1, table2} + set_two = {table1, table2} + self.assertEqual(set_one, set_two) + + def test___hash__not_equals(self): + from google.cloud.bigquery.dataset import DatasetReference + dataset = DatasetReference('project_1', 'dataset_1') + table1 = self._make_one(dataset, 'table1') + table2 = self._make_one(dataset, 'table2') + set_one = {table1} + set_two = {table2} + self.assertNotEqual(set_one, set_two) + + def test___repr__(self): + dataset = DatasetReference('project1', 'dataset1') + table1 = self._make_one(dataset, 'table1') + expected = "TableReference('project1', 'dataset1', 'table1')" + self.assertEqual(repr(table1), expected) + + class TestTable(unittest.TestCase, _SchemaBase): PROJECT = 'prahj-ekt' - DS_NAME = 'dataset-name' + DS_ID = 'dataset-name' TABLE_NAME = 'table-name' @staticmethod @@ -60,11 +169,13 @@ def _setUpConstants(self): self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace( tzinfo=UTC) self.ETAG = 'ETAG' - self.TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + self.TABLE_FULL_ID = '%s:%s:%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = 'http://example.com/path/to/resource' self.NUM_BYTES = 12345 self.NUM_ROWS = 67 + self.NUM_EST_BYTES = 1234 + self.NUM_EST_ROWS = 23 def _makeResource(self): self._setUpConstants() @@ -72,19 +183,29 @@ def _makeResource(self): 'creationTime': self.WHEN_TS * 1000, 'tableReference': {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME}, 'schema': {'fields': [ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, 'etag': 'ETAG', - 'id': self.TABLE_ID, + 'id': self.TABLE_FULL_ID, 'lastModifiedTime': self.WHEN_TS * 1000, 'location': 'US', 'selfLink': self.RESOURCE_URL, 'numRows': self.NUM_ROWS, 'numBytes': self.NUM_BYTES, 'type': 'TABLE', + 'streamingBuffer': { + 'estimatedRows': str(self.NUM_EST_ROWS), + 'estimatedBytes': str(self.NUM_EST_BYTES), + 'oldestEntryTime': self.WHEN_TS * 1000}, + 'externalDataConfiguration': { + 'sourceFormat': 'CSV', + 'csvOptions': { + 'allowJaggedRows': True, + 'encoding': 'encoding'}}, + 'labels': {'x': 'y'}, } def _verifyReadonlyResourceProperties(self, table, resource): @@ -113,7 +234,17 @@ def _verifyReadonlyResourceProperties(self, table, resource): else: self.assertIsNone(table.self_link) - self.assertEqual(table.table_id, self.TABLE_ID) + if 'streamingBuffer' in resource: + self.assertEqual(table.streaming_buffer.estimated_rows, + self.NUM_EST_ROWS) + self.assertEqual(table.streaming_buffer.estimated_bytes, + self.NUM_EST_BYTES) + self.assertEqual(table.streaming_buffer.oldest_entry_time, + self.WHEN) + else: + self.assertIsNone(table.streaming_buffer) + + self.assertEqual(table.full_table_id, self.TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE' if 'view' not in resource else 'VIEW') @@ -134,7 +265,7 @@ def _verifyResourceProperties(self, table, resource): self.assertEqual(table.view_query, resource['view']['query']) self.assertEqual( table.view_use_legacy_sql, - resource['view'].get('useLegacySql')) + resource['view'].get('useLegacySql', True)) else: self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) @@ -144,18 +275,28 @@ def _verifyResourceProperties(self, table, resource): else: self.assertEqual(table.schema, []) + if 'externalDataConfiguration' in resource: + edc = table.external_data_configuration + self.assertEqual(edc.source_format, 'CSV') + self.assertEqual(edc.options.allow_jagged_rows, True) + + if 'labels' in resource: + self.assertEqual(table.labels, {'x': 'y'}) + else: + self.assertEqual(table.labels, {}) + def test_ctor(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - self.assertEqual(table.name, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + self.assertEqual(table.table_id, self.TABLE_NAME) self.assertEqual(table.project, self.PROJECT) - self.assertEqual(table.dataset_name, self.DS_NAME) + self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual( table.path, '/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME)) + self.PROJECT, self.DS_ID, self.TABLE_NAME)) self.assertEqual(table.schema, []) self.assertIsNone(table.created) @@ -164,31 +305,32 @@ def test_ctor(self): self.assertIsNone(table.num_bytes) self.assertIsNone(table.num_rows) self.assertIsNone(table.self_link) - self.assertIsNone(table.table_id) + self.assertIsNone(table.full_table_id) self.assertIsNone(table.table_type) - self.assertIsNone(table.description) self.assertIsNone(table.expires) self.assertIsNone(table.friendly_name) self.assertIsNone(table.location) self.assertIsNone(table.view_query) self.assertIsNone(table.view_use_legacy_sql) + self.assertIsNone(table.external_data_configuration) + self.assertEquals(table.labels, {}) def test_ctor_w_schema(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) + self.assertEqual(table.schema, [full_name, age]) def test_num_bytes_getter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_bytes) @@ -208,9 +350,9 @@ def test_num_bytes_getter(self): getattr(table, 'num_bytes') def test_num_rows_getter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) # Check with no value set. self.assertIsNone(table.num_rows) @@ -230,18 +372,18 @@ def test_num_rows_getter(self): getattr(table, 'num_rows') def test_schema_setter_non_list(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(TypeError): table.schema = object() def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()] @@ -249,9 +391,9 @@ def test_schema_setter_invalid_field(self): def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] @@ -264,20 +406,20 @@ def test_props_set_by_server(self): CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC) MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC) - TABLE_ID = '%s:%s:%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) + TABLE_FULL_ID = '%s:%s:%s' % ( + self.PROJECT, self.DS_ID, self.TABLE_NAME) URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + self.PROJECT, self.DS_ID, self.TABLE_NAME) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table._properties['creationTime'] = _millis(CREATED) table._properties['etag'] = 'ETAG' table._properties['lastModifiedTime'] = _millis(MODIFIED) table._properties['numBytes'] = 12345 table._properties['numRows'] = 66 table._properties['selfLink'] = URL - table._properties['id'] = TABLE_ID + table._properties['id'] = TABLE_FULL_ID table._properties['type'] = 'TABLE' self.assertEqual(table.created, CREATED) @@ -286,27 +428,27 @@ def test_props_set_by_server(self): self.assertEqual(table.num_bytes, 12345) self.assertEqual(table.num_rows, 66) self.assertEqual(table.self_link, URL) - self.assertEqual(table.table_id, TABLE_ID) + self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, 'TABLE') def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.description = 'DESCRIPTION' self.assertEqual(table.description, 'DESCRIPTION') def test_expires_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.expires = object() @@ -315,279 +457,165 @@ def test_expires_setter(self): from google.cloud._helpers import UTC WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC) - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.expires = WHEN self.assertEqual(table.expires, WHEN) def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.friendly_name = 'FRIENDLY' self.assertEqual(table.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.location = 'LOCATION' self.assertEqual(table.location, 'LOCATION') def test_view_query_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_query = 12345 def test_view_query_setter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.view_query = 'select * from foo' self.assertEqual(table.view_query, 'select * from foo') + self.assertEqual(table.view_use_legacy_sql, False) + + table.view_use_legacy_sql = True + self.assertEqual(table.view_use_legacy_sql, True) def test_view_query_deleter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) table.view_query = 'select * from foo' del table.view_query self.assertIsNone(table.view_query) + self.assertIsNone(table.view_use_legacy_sql) def test_view_use_legacy_sql_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) with self.assertRaises(ValueError): table.view_use_legacy_sql = 12345 def test_view_use_legacy_sql_setter(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - table.view_use_legacy_sql = False + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.view_use_legacy_sql = True table.view_query = 'select * from foo' - self.assertEqual(table.view_use_legacy_sql, False) + self.assertEqual(table.view_use_legacy_sql, True) self.assertEqual(table.view_query, 'select * from foo') + def test_external_data_configuration_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.external_data_configuration = 12345 + + def test_labels_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.labels = 12345 + def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, dataset) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) - dataset = _Dataset(client) RESOURCE = { - 'id': '%s:%s:%s' % (self.PROJECT, self.DS_NAME, self.TABLE_NAME), + 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME), 'tableReference': { 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, + 'datasetId': self.DS_ID, 'tableId': self.TABLE_NAME, }, 'type': 'TABLE', } klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) - self.assertEqual(table.name, self.TABLE_NAME) - self.assertIs(table._dataset, dataset) + table = klass.from_api_repr(RESOURCE) + self.assertEqual(table.table_id, self.TABLE_NAME) self._verifyResourceProperties(table, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) - dataset = _Dataset(client) - RESOURCE = self._makeResource() - klass = self._get_target_class() - table = klass.from_api_repr(RESOURCE, dataset) - self.assertIs(table._dataset._client, client) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_new_day_partitioned_table(self): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - table.partitioning_type = 'DAY' - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_no_expire(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) - - self.assertIsNone(table.partitioning_type) - table.partitioning_type = "DAY" - self.assertEqual(table.partitioning_type, "DAY") - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY'}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_create_w_partition_and_expire(self): - from google.cloud.bigquery.table import SchemaField + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) - self.assertIsNone(table.partition_expiration) - table.partition_expiration = 100 - self.assertEqual(table.partitioning_type, "DAY") - self.assertEqual(table.partition_expiration, 100) - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'timePartitioning': {'type': 'DAY', 'expirationMs': 100}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) + RESOURCE['view'] = {'query': 'select fullname, age from person_ages'} + RESOURCE['type'] = 'VIEW' + RESOURCE['location'] = 'EU' + self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + RESOURCE['expirationTime'] = _millis(self.EXP_TIME) + klass = self._get_target_class() + table = klass.from_api_repr(RESOURCE) self._verifyResourceProperties(table, RESOURCE) def test_partition_type_setter_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = 123 def test_partition_type_setter_unknown_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partitioning_type = "HASH" def test_partition_type_setter_w_known_value(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY') @@ -595,14 +623,11 @@ def test_partition_type_setter_w_known_value(self): def test_partition_type_setter_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) @@ -611,28 +636,22 @@ def test_partition_type_setter_w_none(self): def test_partition_experation_bad_type(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER" def test_partition_expiration_w_integer(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = 100 self.assertEqual(table.partitioning_type, "DAY") @@ -641,14 +660,11 @@ def test_partition_expiration_w_integer(self): def test_partition_expiration_w_none(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table._properties['timePartitioning'] = { 'type': 'DAY', @@ -661,1665 +677,77 @@ def test_partition_expiration_w_none(self): def test_partition_expiration_w_none_no_partition_set(self): from google.cloud.bigquery.table import SchemaField - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) + table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration) - def test_list_partitions(self): - from google.cloud.bigquery.table import SchemaField - - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - client._query_results = [(20160804, None), (20160805, None)] - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) - self.assertEqual(table.list_partitions(), [20160804, 20160805]) - - def test_create_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {} - RESOURCE['view']['query'] = QUERY - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client=client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - table.friendly_name = TITLE - table.description = DESCRIPTION - table.view_query = QUERY - - table.create(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': {'query': QUERY}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - def test_create_w_missing_output_properties(self): - # In the wild, the resource returned from 'dataset.create' sometimes - # lacks 'creationTime' / 'lastModifiedTime' - from google.cloud.bigquery.table import SchemaField +class Test_row_from_mapping(unittest.TestCase, _SchemaBase): - PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_NAME) - RESOURCE = self._makeResource() - del RESOURCE['creationTime'] - del RESOURCE['lastModifiedTime'] - self.WHEN = None - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset, - schema=[full_name, age]) - - table.create() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': { - 'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn = _Connection() - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - self.assertFalse(table.exists()) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_exists_hit_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - self.assertTrue(table.exists(client=client2)) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], {'fields': 'id'}) - - def test_reload_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.reload() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - RESOURCE = self._makeResource() - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.reload(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_invalid_expiration(self): - RESOURCE = self._makeResource() - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - with self.assertRaises(ValueError): - table.patch(expires='BOGUS') - - def test_patch_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.patch(description=DESCRIPTION, - friendly_name=TITLE, - view_query=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = { - 'description': DESCRIPTION, - 'friendlyName': TITLE, - 'view': None, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - QUERY = 'select fullname, age from person_ages' - LOCATION = 'EU' - RESOURCE = self._makeResource() - RESOURCE['view'] = {'query': QUERY} - RESOURCE['type'] = 'VIEW' - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - - table.patch(client=client2, view_query=QUERY, location=LOCATION, - expires=self.EXP_TIME, schema=[full_name, age]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PATCH') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'view': {'query': QUERY}, - 'location': LOCATION, - 'expirationTime': _millis(self.EXP_TIME), - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_patch_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.patch(schema=None) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PATCH') - SENT = {'schema': None} - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_bound_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - DESCRIPTION = 'DESCRIPTION' - TITLE = 'TITLE' - RESOURCE = self._makeResource() - RESOURCE['description'] = DESCRIPTION - RESOURCE['friendlyName'] = TITLE - conn = _Connection(RESOURCE) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age]) - table.description = DESCRIPTION - table.friendly_name = TITLE - - table.update() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'PUT') - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}, - 'description': DESCRIPTION, - 'friendlyName': TITLE, - } - self.assertEqual(req['data'], SENT) - self.assertEqual(req['path'], '/%s' % PATH) - self._verifyResourceProperties(table, RESOURCE) - - def test_update_w_alternate_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - DEF_TABLE_EXP = 12345 - LOCATION = 'EU' - QUERY = 'select fullname, age from person_ages' - RESOURCE = self._makeResource() - RESOURCE['defaultTableExpirationMs'] = 12345 - RESOURCE['location'] = LOCATION - self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, - tzinfo=UTC) - RESOURCE['expirationTime'] = _millis(self.EXP_TIME) - RESOURCE['view'] = {'query': QUERY, 'useLegacySql': True} - RESOURCE['type'] = 'VIEW' - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESOURCE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - table.default_table_expiration_ms = DEF_TABLE_EXP - table.location = LOCATION - table.expires = self.EXP_TIME - table.view_query = QUERY - table.view_use_legacy_sql = True - - table.update(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'PUT') - self.assertEqual(req['path'], '/%s' % PATH) - SENT = { - 'tableReference': - {'projectId': self.PROJECT, - 'datasetId': self.DS_NAME, - 'tableId': self.TABLE_NAME}, - 'expirationTime': _millis(self.EXP_TIME), - 'location': 'EU', - 'view': {'query': QUERY, 'useLegacySql': True}, - } - self.assertEqual(req['data'], SENT) - self._verifyResourceProperties(table, RESOURCE) - - def test_delete_w_bound_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.delete() - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_delete_w_alternate_client(self): - PATH = 'projects/%s/datasets/%s/tables/%s' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection({}) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - table.delete(client=client2) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'DELETE') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - - with self.assertRaises(ValueError) as exc: - table.fetch_data() - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_fetch_data_w_bound_client(self): - import datetime - import six - from google.cloud._helpers import UTC - from google.cloud.bigquery.table import SchemaField + PROJECT = 'prahj-ekt' + DS_ID = 'dataset-name' + TABLE_NAME = 'table-name' - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - WHEN_1 = WHEN + datetime.timedelta(seconds=1) - WHEN_2 = WHEN + datetime.timedelta(seconds=2) - ROWS = 1234 - TOKEN = 'TOKEN' - - def _bigquery_timestamp_float_repr(ts_float): - # Preserve microsecond precision for E+09 timestamps - return '%0.15E' % (ts_float,) - - DATA = { - 'totalRows': str(ROWS), - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS)}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': None}, - {'v': None}, - ]}, - ] - } + def _call_fut(self, mapping, schema): + from google.cloud.bigquery.table import _row_from_mapping - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='NULLABLE') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, WHEN)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, WHEN_1)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, WHEN_2)) - self.assertEqual(rows[3], ('Bhettye Rhubble', None, None)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_alternate_client(self): - import six - from google.cloud.bigquery.table import SchemaField + return _row_from_mapping(mapping, schema) - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - MAX = 10 - TOKEN = 'TOKEN' - DATA = { - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': '32'}, - {'v': 'true'}, - {'v': '3.1415926'}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': '33'}, - {'v': 'false'}, - {'v': '1.414'}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': '29'}, - {'v': 'true'}, - {'v': '2.71828'}, - ]}, - {'f': [ - {'v': 'Bhettye Rhubble'}, - {'v': '27'}, - {'v': None}, - {'v': None}, - ]}, - ] - } - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(DATA) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - score = SchemaField('score', 'FLOAT', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter, score]) - - iterator = table.fetch_data( - client=client2, max_results=MAX, page_token=TOKEN) - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 4) - self.assertEqual(rows[0], ('Phred Phlyntstone', 32, True, 3.1415926)) - self.assertEqual(rows[1], ('Bharney Rhubble', 33, False, 1.414)) - self.assertEqual(rows[2], ('Wylma Phlyntstone', 29, True, 2.71828)) - self.assertEqual(rows[3], ('Bhettye Rhubble', 27, None, None)) - self.assertIsNone(total_rows) - self.assertIsNone(page_token) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['query_params'], - {'maxResults': MAX, 'pageToken': TOKEN}) - - def test_fetch_data_w_repeated_fields(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': [{'v': 'red'}, {'v': 'green'}]}, - {'v': [{ - 'v': { - 'f': [ - {'v': [{'v': '1'}, {'v': '2'}]}, - {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, - ]} - }]}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - color = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[color, struct]) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0][0], ['red', 'green']) - self.assertEqual(rows[0][1], [{'index': [1, 2], - 'score': [3.1415, 1.414]}]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_fetch_data_w_record_schema(self): - import six - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/data' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - ROWS = 1234 - TOKEN = 'TOKEN' - DATA = { - 'totalRows': ROWS, - 'pageToken': TOKEN, - 'rows': [ - {'f': [ - {'v': 'Phred Phlyntstone'}, - {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - ]}, - {'f': [ - {'v': 'Bharney Rhubble'}, - {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - ]}, - {'f': [ - {'v': 'Wylma Phlyntstone'}, - {'v': None}, - ]}, - ] - } - conn = _Connection(DATA) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) - - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token - - self.assertEqual(len(rows), 3) - self.assertEqual(rows[0][0], 'Phred Phlyntstone') - self.assertEqual(rows[0][1], {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}) - self.assertEqual(rows[1][0], 'Bharney Rhubble') - self.assertEqual(rows[1][1], {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}) - self.assertEqual(rows[2][0], 'Wylma Phlyntstone') - self.assertIsNone(rows[2][1]) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) - - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'GET') - self.assertEqual(req['path'], '/%s' % PATH) - - def test_row_from_mapping_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA + def test__row_from_mapping_wo_schema(self): + from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32} - client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = Table(table_ref) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - def test_row_from_mapping_w_invalid_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_invalid_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'bogus': 'WHATEVER', } - client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, bogus]) + table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: - table.row_from_mapping(MAPPING) + self._call_fut(MAPPING, table.schema) self.assertIn('Unknown field mode: BOGUS', str(exc.exception)) - def test_row_from_mapping_w_schema(self): - from google.cloud.bigquery.table import SchemaField + def test__row_from_mapping_w_schema(self): + from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'extra': 'IGNORED', } - client = _Client(project=self.PROJECT) - dataset = _Dataset(client) + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, colors, joined]) + table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual( - table.row_from_mapping(MAPPING), + self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None)) - - def test_insert_data_wo_schema(self): - from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA - - client = _Client(project=self.PROJECT) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset=dataset) - ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), - ] - - with self.assertRaises(ValueError) as exc: - table.insert_data(ROWS) - - self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,)) - - def test_insert_data_w_bound_client(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime - from google.cloud.bigquery.table import SchemaField - - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace( - tzinfo=UTC) - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, joined]) - ROWS = [ - ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)), - ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)), - ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)), - ('Bhettye Rhubble', 27, None), - ] - - def _row_data(row): - joined = row[2] - if isinstance(row[2], datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 - return {'full_name': row[0], - 'age': str(row[1]), - 'joined': joined} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_alternate_client(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - RESPONSE = { - 'insertErrors': [ - {'index': 1, - 'errors': [ - {'reason': 'REASON', - 'location': 'LOCATION', - 'debugInfo': 'INFO', - 'message': 'MESSAGE'} - ]}, - ]} - conn1 = _Connection() - client1 = _Client(project=self.PROJECT, connection=conn1) - conn2 = _Connection(RESPONSE) - client2 = _Client(project=self.PROJECT, connection=conn2) - dataset = _Dataset(client1) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - voter = SchemaField('voter', 'BOOLEAN', mode='NULLABLE') - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, age, voter]) - ROWS = [ - ('Phred Phlyntstone', 32, True), - ('Bharney Rhubble', 33, False), - ('Wylma Phlyntstone', 29, True), - ('Bhettye Rhubble', 27, True), - ] - - def _row_data(row): - return { - 'full_name': row[0], - 'age': str(row[1]), - 'voter': row[2] and 'true' or 'false', - } - - SENT = { - 'skipInvalidRows': True, - 'ignoreUnknownValues': True, - 'templateSuffix': '20160303', - 'rows': [{'insertId': index, 'json': _row_data(row)} - for index, row in enumerate(ROWS)], - } - - errors = table.insert_data( - client=client2, - rows=ROWS, - row_ids=[index for index, _ in enumerate(ROWS)], - skip_invalid_rows=True, - ignore_unknown_values=True, - template_suffix='20160303', - ) - - self.assertEqual(len(errors), 1) - self.assertEqual(errors[0]['index'], 1) - self.assertEqual(len(errors[0]['errors']), 1) - self.assertEqual(errors[0]['errors'][0], - RESPONSE['insertErrors'][0]['errors'][0]) - - self.assertEqual(len(conn1._requested), 0) - self.assertEqual(len(conn2._requested), 1) - req = conn2._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_repeated_fields(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('color', 'STRING', mode='REPEATED') - index = SchemaField('index', 'INTEGER', 'REPEATED') - score = SchemaField('score', 'FLOAT', 'REPEATED') - struct = SchemaField('struct', 'RECORD', mode='REPEATED', - fields=[index, score]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) - ROWS = [ - (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]), - ] - - def _row_data(row): - return {'color': row[0], - 'struct': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test_insert_data_w_record_schema(self): - from google.cloud.bigquery.table import SchemaField - - PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % ( - self.PROJECT, self.DS_NAME, self.TABLE_NAME) - conn = _Connection({}) - client = _Client(project=self.PROJECT, connection=conn) - dataset = _Dataset(client) - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - area_code = SchemaField('area_code', 'STRING', 'REQUIRED') - local_number = SchemaField('local_number', 'STRING', 'REQUIRED') - rank = SchemaField('rank', 'INTEGER', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='NULLABLE', - fields=[area_code, local_number, rank]) - table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, phone]) - ROWS = [ - ('Phred Phlyntstone', {'area_code': '800', - 'local_number': '555-1212', - 'rank': 1}), - ('Bharney Rhubble', {'area_code': '877', - 'local_number': '768-5309', - 'rank': 2}), - ('Wylma Phlyntstone', None), - ] - - def _row_data(row): - return {'full_name': row[0], - 'phone': row[1]} - - SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], - } - - errors = table.insert_data(ROWS) - - self.assertEqual(len(errors), 0) - self.assertEqual(len(conn._requested), 1) - req = conn._requested[0] - self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], '/%s' % PATH) - self.assertEqual(req['data'], SENT) - - def test__get_transport(self): - client = mock.Mock(spec=[u'_credentials', '_http']) - client._http = mock.sentinel.http - table = self._make_one(self.TABLE_NAME, None) - - transport = table._get_transport(client) - - self.assertIs(transport, mock.sentinel.http) - - @staticmethod - def _mock_requests_response(status_code, headers, content=b''): - return mock.Mock( - content=content, headers=headers, status_code=status_code, - spec=['content', 'headers', 'status_code']) - - def _mock_transport(self, status_code, headers, content=b''): - fake_transport = mock.Mock(spec=['request']) - fake_response = self._mock_requests_response( - status_code, headers, content=content) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.table import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.table import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - - # Create mocks to be checked for doing transport. - resumable_url = 'http://test.invalid?upload_id=hey-you' - response_headers = {'location': resumable_url} - fake_transport = self._mock_transport( - http_client.OK, response_headers) - client._http = fake_transport - - # Create some mock arguments and call the method under test. - data = b'goodbye gudbi gootbee' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) - upload, transport = table._initiate_resumable_upload( - client, stream, metadata, num_retries) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=resumable') - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(connection.USER_AGENT) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=json.dumps(metadata).encode('utf-8'), - headers=request_headers, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None): - from google.cloud.bigquery.table import _get_upload_headers - from google.cloud.bigquery.table import _get_upload_metadata - - connection = _Connection() - client = _Client(self.PROJECT, connection=connection) - dataset = _Dataset(client) - table = self._make_one(self.TABLE_NAME, dataset) - - # Create mocks to be checked for doing transport. - fake_transport = self._mock_transport(http_client.OK, {}) - client._http = fake_transport - - # Create some mock arguments. - data = b'Bzzzz-zap \x00\x01\xf4' - stream = io.BytesIO(data) - metadata = _get_upload_metadata( - 'CSV', table._schema, table._dataset, table.name) - size = len(data) - response = table._do_multipart_upload( - client, stream, metadata, size, num_retries) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - upload_url = ( - 'https://www.googleapis.com/upload/bigquery/v2/projects/' + - self.PROJECT + - '/jobs?uploadType=multipart') - payload = ( - b'--==0==\r\n' + - b'content-type: application/json; charset=UTF-8\r\n\r\n' + - json.dumps(metadata).encode('utf-8') + b'\r\n' + - b'--==0==\r\n' + - b'content-type: */*\r\n\r\n' + - data + b'\r\n' + - b'--==0==--') - headers = _get_upload_headers(connection.USER_AGENT) - headers['content-type'] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - 'POST', - upload_url, - data=payload, - headers=headers, - ) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch(u'google.resumable_media._upload.get_boundary', - return_value=b'==0==') - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - -class TestTableUpload(object): - # NOTE: This is a "partner" to `TestTable` meant to test some of the - # "upload" portions of `Table`. It also uses `pytest`-style tests - # rather than `unittest`-style. - - @staticmethod - def _make_table(transport=None): - from google.cloud.bigquery import _http - from google.cloud.bigquery import client - from google.cloud.bigquery import dataset - from google.cloud.bigquery import table - - connection = mock.create_autospec(_http.Connection, instance=True) - client = mock.create_autospec(client.Client, instance=True) - client._connection = connection - client._credentials = mock.sentinel.credentials - client._http = transport - client.project = 'project_id' - - dataset = dataset.Dataset('test_dataset', client) - table = table.Table('test_table', dataset) - - return table - - @staticmethod - def _make_response(status_code, content='', headers={}): - """Make a mock HTTP response.""" - import requests - response = requests.Response() - response.request = requests.Request( - 'POST', 'http://example.com').prepare() - response._content = content.encode('utf-8') - response.headers.update(headers) - response.status_code = status_code - return response - - @classmethod - def _make_do_upload_patch(cls, table, method, side_effect=None): - """Patches the low-level upload helpers.""" - if side_effect is None: - side_effect = [cls._make_response( - http_client.OK, - json.dumps({}), - {'Content-Type': 'application/json'})] - return mock.patch.object( - table, method, side_effect=side_effect, autospec=True) - - EXPECTED_CONFIGURATION = { - 'configuration': { - 'load': { - 'sourceFormat': 'CSV', - 'destinationTable': { - 'projectId': 'project_id', - 'datasetId': 'test_dataset', - 'tableId': 'test_table' - } - } - } - } - - @staticmethod - def _make_file_obj(): - return io.BytesIO(b'hello, is it me you\'re looking for?') - - # High-level tests - - def test_upload_from_file_resumable(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file(file_obj, source_format='CSV') - - do_upload.assert_called_once_with( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_file_resumable_metadata(self): - table = self._make_table() - file_obj = self._make_file_obj() - - config_args = { - 'source_format': 'CSV', - 'allow_jagged_rows': False, - 'allow_quoted_newlines': False, - 'create_disposition': 'CREATE_IF_NEEDED', - 'encoding': 'utf8', - 'field_delimiter': ',', - 'ignore_unknown_values': False, - 'max_bad_records': 0, - 'quote_character': '"', - 'skip_leading_rows': 1, - 'write_disposition': 'WRITE_APPEND', - 'job_name': 'oddjob', - 'null_marker': r'\N', - } - - expected_config = { - 'configuration': { - 'load': { - 'sourceFormat': config_args['source_format'], - 'destinationTable': { - 'projectId': table._dataset._client.project, - 'datasetId': table.dataset_name, - 'tableId': table.name, - }, - 'allowJaggedRows': config_args['allow_jagged_rows'], - 'allowQuotedNewlines': - config_args['allow_quoted_newlines'], - 'createDisposition': config_args['create_disposition'], - 'encoding': config_args['encoding'], - 'fieldDelimiter': config_args['field_delimiter'], - 'ignoreUnknownValues': - config_args['ignore_unknown_values'], - 'maxBadRecords': config_args['max_bad_records'], - 'quote': config_args['quote_character'], - 'skipLeadingRows': config_args['skip_leading_rows'], - 'writeDisposition': config_args['write_disposition'], - 'jobReference': {'jobId': config_args['job_name']}, - 'nullMarker': config_args['null_marker'], - }, - }, - } - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, **config_args) - - do_upload.assert_called_once_with( - table._dataset._client, - file_obj, - expected_config, - mock.ANY) - - def test_upload_from_file_multipart(self): - import google.cloud.bigquery.table - - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_size = 10 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_multipart_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', size=file_obj_size) - - do_upload.assert_called_once_with( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_size, - google.cloud.bigquery.table._DEFAULT_NUM_RETRIES) - - def test_upload_from_file_with_retries(self): - table = self._make_table() - file_obj = self._make_file_obj() - num_retries = 20 - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload') - with do_upload_patch as do_upload: - table.upload_from_file( - file_obj, source_format='CSV', num_retries=num_retries) - - do_upload.assert_called_once_with( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - num_retries) - - def test_upload_from_file_with_rewind(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj.seek(2) - - with self._make_do_upload_patch(table, '_do_resumable_upload'): - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert file_obj.tell() == 0 - - def test_upload_from_file_failure(self): - from google.resumable_media import InvalidResponse - from google.cloud import exceptions - - table = self._make_table() - file_obj = self._make_file_obj() - - response = self._make_response( - content='Someone is already in this spot.', - status_code=http_client.CONFLICT) - - do_upload_patch = self._make_do_upload_patch( - table, '_do_resumable_upload', - side_effect=InvalidResponse(response)) - - with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info: - table.upload_from_file( - file_obj, source_format='CSV', rewind=True) - - assert response.text in exc_info.value.message - assert exc_info.value.errors == [] - - def test_upload_from_file_bad_mode(self): - table = self._make_table() - file_obj = mock.Mock(spec=['mode']) - file_obj.mode = 'x' - - with pytest.raises(ValueError): - table.upload_from_file( - file_obj, source_format='CSV',) - - # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1' - initial_response = cls._make_response( - http_client.OK, '', {'location': resumable_url}) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - '', {'range': 'bytes=0-{:d}'.format(size - 1)}) - final_response = cls._make_response( - http_client.OK, - json.dumps({'size': size}), - {'Content-Type': 'application/json'}) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len)) - table = self._make_table(transport) - - result = table._do_resumable_upload( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - None) - - content = result.content.decode('utf-8') - assert json.loads(content) == {'size': file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - 'POST', - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'), - headers=mock.ANY) - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http_client.OK)]) - table = self._make_table(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - table._do_multipart_upload( - table._dataset._client, - file_obj, - self.EXPECTED_CONFIGURATION, - file_obj_len, - None) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args['data'].decode('utf-8') - request_headers = request_args['headers'] - - request_content = email.message_from_string( - 'Content-Type: {}\r\n{}'.format( - request_headers['content-type'].decode('utf-8'), - request_data)) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode('utf-8') == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - table = self._make_table() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - table._do_multipart_upload( - table._dataset._client, - file_obj, - {}, - file_obj_len+1, - None) - - -class Test_parse_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _parse_schema_resource - - return _parse_schema_resource(resource) - - def _makeResource(self): - return { - 'schema': {'fields': [ - {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'}, - {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}, - ]}, - } - - def test__parse_schema_resource_defaults(self): - RESOURCE = self._makeResource() - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_subfields(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - def test__parse_schema_resource_fields_without_mode(self): - RESOURCE = self._makeResource() - RESOURCE['schema']['fields'].append( - {'name': 'phone', - 'type': 'STRING'}) - - schema = self._call_fut(RESOURCE['schema']) - self._verifySchema(schema, RESOURCE) - - -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): - - def _call_fut(self, resource): - from google.cloud.bigquery.table import _build_schema_resource - - return _build_schema_resource(resource) - - def test_defaults(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_description(self): - from google.cloud.bigquery.table import SchemaField - - DESCRIPTION = 'DESCRIPTION' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED', - description=DESCRIPTION) - age = SchemaField('age', 'INTEGER', mode='REQUIRED') - resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED', - 'description': DESCRIPTION}) - self.assertEqual(resource[1], - {'name': 'age', - 'type': 'INTEGER', - 'mode': 'REQUIRED'}) - - def test_w_subfields(self): - from google.cloud.bigquery.table import SchemaField - - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - ph_type = SchemaField('type', 'STRING', 'REQUIRED') - ph_num = SchemaField('number', 'STRING', 'REQUIRED') - phone = SchemaField('phone', 'RECORD', mode='REPEATED', - fields=[ph_type, ph_num]) - resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual(resource[0], - {'name': 'full_name', - 'type': 'STRING', - 'mode': 'REQUIRED'}) - self.assertEqual(resource[1], - {'name': 'phone', - 'type': 'RECORD', - 'mode': 'REPEATED', - 'fields': [{'name': 'type', - 'type': 'STRING', - 'mode': 'REQUIRED'}, - {'name': 'number', - 'type': 'STRING', - 'mode': 'REQUIRED'}]}) - - -class Test__get_upload_metadata(unittest.TestCase): - - @staticmethod - def _call_fut(source_format, schema, dataset, name): - from google.cloud.bigquery.table import _get_upload_metadata - - return _get_upload_metadata(source_format, schema, dataset, name) - - def test_empty_schema(self): - source_format = 'AVRO' - dataset = mock.Mock(project='prediction', spec=['name', 'project']) - dataset.name = 'market' # mock.Mock() treats `name` specially. - table_name = 'chairs' - metadata = self._call_fut(source_format, [], dataset, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.name, - 'tableId': table_name, - }, - }, - }, - } - self.assertEqual(metadata, expected) - - def test_with_schema(self): - from google.cloud.bigquery.table import SchemaField - - source_format = 'CSV' - full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') - dataset = mock.Mock(project='blind', spec=['name', 'project']) - dataset.name = 'movie' # mock.Mock() treats `name` specially. - table_name = 'teebull-neem' - metadata = self._call_fut( - source_format, [full_name], dataset, table_name) - - expected = { - 'configuration': { - 'load': { - 'sourceFormat': source_format, - 'destinationTable': { - 'projectId': dataset.project, - 'datasetId': dataset.name, - 'tableId': table_name, - }, - 'schema': { - 'fields': [ - { - 'name': full_name.name, - 'type': full_name.field_type, - 'mode': full_name.mode, - }, - ], - }, - }, - }, - } - self.assertEqual(metadata, expected) - - -class _Client(object): - - _query_results = () - - def __init__(self, project='project', connection=None): - self.project = project - self._connection = connection - - def run_sync_query(self, query): - return _Query(query, self) - - -class _Query(object): - - def __init__(self, query, client): - self.query = query - self.rows = [] - self.client = client - - def run(self): - self.rows = self.client._query_results - - -class _Dataset(object): - - def __init__(self, client, name=TestTable.DS_NAME): - self._client = client - self.name = name - - @property - def path(self): - return '/projects/%s/datasets/%s' % ( - self._client.project, self.name) - - @property - def project(self): - return self._client.project - - -class _Connection(object): - - API_BASE_URL = 'http://example.com' - USER_AGENT = 'testing 1.2.3' - - def __init__(self, *responses): - self._responses = responses[:] - self._requested = [] - - def api_request(self, **kw): - from google.cloud.exceptions import NotFound - - self._requested.append(kw) - - try: - response, self._responses = self._responses[0], self._responses[1:] - except IndexError: - raise NotFound('miss') - else: - return response diff --git a/bigtable/google/cloud/bigtable/table.py b/bigtable/google/cloud/bigtable/table.py index aaec98b6265b..d1711f5be704 100644 --- a/bigtable/google/cloud/bigtable/table.py +++ b/bigtable/google/cloud/bigtable/table.py @@ -113,7 +113,7 @@ def row(self, row_key, filter_=None, append=False): .. warning:: At most one of ``filter_`` and ``append`` can be used in a - :class:`Row`. + :class:`.Row`. :type row_key: bytes :param row_key: The key for the row being created. diff --git a/docs/bigquery/client.rst b/docs/bigquery/client.rst deleted file mode 100644 index 42d4ed8e082a..000000000000 --- a/docs/bigquery/client.rst +++ /dev/null @@ -1,6 +0,0 @@ -Client -====== - -.. automodule:: google.cloud.bigquery.client - :members: - :show-inheritance: diff --git a/docs/bigquery/dataset.rst b/docs/bigquery/dataset.rst deleted file mode 100644 index dd1d05352918..000000000000 --- a/docs/bigquery/dataset.rst +++ /dev/null @@ -1,6 +0,0 @@ -Datasets -~~~~~~~~ - -.. automodule:: google.cloud.bigquery.dataset - :members: - :show-inheritance: diff --git a/docs/bigquery/dbapi.rst b/docs/bigquery/dbapi.rst new file mode 100644 index 000000000000..ca0256d3c8de --- /dev/null +++ b/docs/bigquery/dbapi.rst @@ -0,0 +1,6 @@ +DB-API Reference +~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.bigquery.dbapi + :members: + :show-inheritance: diff --git a/docs/bigquery/job.rst b/docs/bigquery/job.rst deleted file mode 100644 index 6ab5339fe166..000000000000 --- a/docs/bigquery/job.rst +++ /dev/null @@ -1,7 +0,0 @@ -Jobs -~~~~ - -.. automodule:: google.cloud.bigquery.job - :members: - :inherited-members: - :show-inheritance: diff --git a/docs/bigquery/query.rst b/docs/bigquery/query.rst deleted file mode 100644 index d8b9da09cde0..000000000000 --- a/docs/bigquery/query.rst +++ /dev/null @@ -1,6 +0,0 @@ -Query -~~~~~ - -.. automodule:: google.cloud.bigquery.query - :members: - :show-inheritance: diff --git a/docs/bigquery/reference.rst b/docs/bigquery/reference.rst new file mode 100644 index 000000000000..74209a95ea0d --- /dev/null +++ b/docs/bigquery/reference.rst @@ -0,0 +1,6 @@ +API Reference +~~~~~~~~~~~~~ + +.. automodule:: google.cloud.bigquery + :members: + :show-inheritance: diff --git a/docs/bigquery/schema.rst b/docs/bigquery/schema.rst deleted file mode 100644 index 2a3cc254a561..000000000000 --- a/docs/bigquery/schema.rst +++ /dev/null @@ -1,6 +0,0 @@ -Schemas -~~~~~~~ - -.. automodule:: google.cloud.bigquery.schema - :members: - :show-inheritance: diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py index 6f1eb29e392d..3ae8486ade9e 100644 --- a/docs/bigquery/snippets.py +++ b/docs/bigquery/snippets.py @@ -23,45 +23,53 @@ need to be deleted during teardown. """ -import operator import time import pytest import six -from google.cloud.bigquery import SchemaField -from google.cloud.bigquery.client import Client +from google.cloud import bigquery ORIGINAL_FRIENDLY_NAME = 'Original friendly name' ORIGINAL_DESCRIPTION = 'Original description' LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name' LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description' -PATCHED_FRIENDLY_NAME = 'Patched friendly name' -PATCHED_DESCRIPTION = 'Patched description' UPDATED_FRIENDLY_NAME = 'Updated friendly name' UPDATED_DESCRIPTION = 'Updated description' SCHEMA = [ - SchemaField('full_name', 'STRING', mode='required'), - SchemaField('age', 'INTEGER', mode='required'), + bigquery.SchemaField('full_name', 'STRING', mode='required'), + bigquery.SchemaField('age', 'INTEGER', mode='required'), +] + +ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), ] QUERY = ( - 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' 'WHERE state = "TX"') @pytest.fixture(scope='module') def client(): - return Client() + return bigquery.Client() @pytest.fixture -def to_delete(): +def to_delete(client): doomed = [] yield doomed for item in doomed: - item.delete() + if isinstance(item, bigquery.Dataset): + client.delete_dataset(item) + elif isinstance(item, bigquery.Table): + client.delete_table(item) + else: + item.delete() def _millis(): @@ -89,476 +97,542 @@ def do_something_with(_): # [END client_list_datasets] -def test_dataset_create(client, to_delete): +def test_create_dataset(client, to_delete): """Create a dataset.""" - DATASET_NAME = 'dataset_create_%d' % (_millis(),) - - # [START dataset_create] - dataset = client.dataset(DATASET_NAME) - dataset.create() # API request - # [END dataset_create] - - to_delete.append(dataset) + DATASET_ID = 'create_dataset_%d' % (_millis(),) + # [START create_dataset] + # DATASET_ID = 'dataset_ids_are_strings' + dataset_ref = client.dataset(DATASET_ID) + dataset = bigquery.Dataset(dataset_ref) + dataset.description = 'my dataset' + dataset = client.create_dataset(dataset) # API request + # [END create_dataset] -def test_dataset_exists(client, to_delete): - """Test existence of a dataset.""" - DATASET_NAME = 'dataset_exists_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) to_delete.append(dataset) - # [START dataset_exists] - assert not dataset.exists() # API request - dataset.create() # API request - assert dataset.exists() # API request - # [END dataset_exists] - -def test_dataset_reload(client, to_delete): +def test_get_dataset(client, to_delete): """Reload a dataset's metadata.""" - DATASET_NAME = 'dataset_reload_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) + DATASET_ID = 'get_dataset_%d' % (_millis(),) + dataset_ref = client.dataset(DATASET_ID) + dataset = bigquery.Dataset(dataset_ref) dataset.description = ORIGINAL_DESCRIPTION - dataset.create() + dataset = client.create_dataset(dataset) # API request to_delete.append(dataset) - # [START dataset_reload] + # [START get_dataset] assert dataset.description == ORIGINAL_DESCRIPTION dataset.description = LOCALLY_CHANGED_DESCRIPTION assert dataset.description == LOCALLY_CHANGED_DESCRIPTION - dataset.reload() # API request + dataset = client.get_dataset(dataset) # API request assert dataset.description == ORIGINAL_DESCRIPTION - # [END dataset_reload] + # [END get_dataset] -def test_dataset_patch(client, to_delete): - """Patch a dataset's metadata.""" - DATASET_NAME = 'dataset_patch_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) +def test_update_dataset_simple(client, to_delete): + """Update a dataset's metadata.""" + DATASET_ID = 'update_dataset_simple_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset.description = ORIGINAL_DESCRIPTION - dataset.create() + client.create_dataset(dataset) to_delete.append(dataset) - # [START dataset_patch] - ONE_DAY_MS = 24 * 60 * 60 * 1000 + # [START update_dataset_simple] assert dataset.description == ORIGINAL_DESCRIPTION - dataset.patch( - description=PATCHED_DESCRIPTION, - default_table_expiration_ms=ONE_DAY_MS - ) # API request - assert dataset.description == PATCHED_DESCRIPTION - assert dataset.default_table_expiration_ms == ONE_DAY_MS - # [END dataset_patch] + dataset.description = UPDATED_DESCRIPTION + + dataset = client.update_dataset(dataset, ['description']) # API request + + assert dataset.description == UPDATED_DESCRIPTION + # [END update_dataset_simple] -def test_dataset_update(client, to_delete): +def test_update_dataset_multiple_properties(client, to_delete): """Update a dataset's metadata.""" - DATASET_NAME = 'dataset_update_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) + DATASET_ID = 'update_dataset_multiple_properties_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset.description = ORIGINAL_DESCRIPTION - dataset.create() + dataset = client.create_dataset(dataset) to_delete.append(dataset) - dataset.reload() - # [START dataset_update] - from google.cloud.bigquery import AccessGrant + # [START update_dataset_multiple_properties] assert dataset.description == ORIGINAL_DESCRIPTION assert dataset.default_table_expiration_ms is None - grant = AccessGrant( + entry = bigquery.AccessEntry( role='READER', entity_type='domain', entity_id='example.com') - assert grant not in dataset.access_grants - ONE_DAY_MS = 24 * 60 * 60 * 1000 + assert entry not in dataset.access_entries + ONE_DAY_MS = 24 * 60 * 60 * 1000 # in milliseconds dataset.description = UPDATED_DESCRIPTION dataset.default_table_expiration_ms = ONE_DAY_MS - grants = list(dataset.access_grants) - grants.append(grant) - dataset.access_grants = grants - dataset.update() # API request + entries = list(dataset.access_entries) + entries.append(entry) + dataset.access_entries = entries + + dataset = client.update_dataset( + dataset, + ['description', 'default_table_expiration_ms', 'access_entries'] + ) # API request + assert dataset.description == UPDATED_DESCRIPTION assert dataset.default_table_expiration_ms == ONE_DAY_MS - assert grant in dataset.access_grants - # [END dataset_update] + assert entry in dataset.access_entries + # [END update_dataset_multiple_properties] -def test_dataset_delete(client): +def test_delete_dataset(client): """Delete a dataset.""" - DATASET_NAME = 'dataset_delete_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'delete_dataset_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + client.create_dataset(dataset) + + # [START delete_dataset] + from google.cloud.exceptions import NotFound - # [START dataset_delete] - assert dataset.exists() # API request - dataset.delete() - assert not dataset.exists() # API request - # [END dataset_delete] + client.delete_dataset(dataset) # API request + with pytest.raises(NotFound): + client.get_dataset(dataset) # API request + # [END delete_dataset] -def test_dataset_list_tables(client, to_delete): + +def test_list_dataset_tables(client, to_delete): """List tables within a dataset.""" - DATASET_NAME = 'dataset_list_tables_dataset_%d' % (_millis(),) - TABLE_NAME = 'dataset_list_tables_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'list_dataset_tables_dataset_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dataset = client.create_dataset(dataset) to_delete.append(dataset) - # [START dataset_list_tables] - tables = list(dataset.list_tables()) # API request(s) + # [START list_dataset_tables] + tables = list(client.list_dataset_tables(dataset)) # API request(s) assert len(tables) == 0 - table = dataset.table(TABLE_NAME) + + table_ref = dataset.table('my_table') + table = bigquery.Table(table_ref) table.view_query = QUERY - table.create() # API request - tables = list(dataset.list_tables()) # API request(s) + client.create_table(table) # API request + tables = list(client.list_dataset_tables(dataset)) # API request(s) + assert len(tables) == 1 - assert tables[0].name == TABLE_NAME - # [END dataset_list_tables] + assert tables[0].table_id == 'my_table' + # [END list_dataset_tables] + to_delete.insert(0, table) -def test_table_create(client, to_delete): +def test_create_table(client, to_delete): """Create a table.""" - DATASET_NAME = 'table_create_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_create_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'create_table_dataset_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + client.create_dataset(dataset) to_delete.append(dataset) - # [START table_create] - table = dataset.table(TABLE_NAME, SCHEMA) - table.create() # API request - # [END table_create] - - to_delete.insert(0, table) - - -def test_table_exists(client, to_delete): - """Test existence of a table.""" - DATASET_NAME = 'table_exists_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_exists_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() - to_delete.append(dataset) + # [START create_table] + SCHEMA = [ + bigquery.SchemaField('full_name', 'STRING', mode='required'), + bigquery.SchemaField('age', 'INTEGER', mode='required'), + ] + table_ref = dataset.table('my_table') + table = bigquery.Table(table_ref, schema=SCHEMA) + table = client.create_table(table) # API request - # [START table_exists] - table = dataset.table(TABLE_NAME, SCHEMA) - assert not table.exists() # API request - table.create() # API request - assert table.exists() # API request - # [END table_exists] + assert table.table_id == 'my_table' + # [END create_table] to_delete.insert(0, table) -def test_table_reload(client, to_delete): +def test_get_table(client, to_delete): """Reload a table's metadata.""" - DATASET_NAME = 'table_reload_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_reload_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'get_table_dataset_%d' % (_millis(),) + TABLE_ID = 'get_table_table_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dataset = client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) - table.friendly_name = ORIGINAL_FRIENDLY_NAME + table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA) table.description = ORIGINAL_DESCRIPTION - table.create() + table = client.create_table(table) to_delete.insert(0, table) - # [START table_reload] - assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + # [START get_table] assert table.description == ORIGINAL_DESCRIPTION - table.friendly_name = LOCALLY_CHANGED_FRIENDLY_NAME table.description = LOCALLY_CHANGED_DESCRIPTION - table.reload() # API request - assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + table = client.get_table(table) # API request assert table.description == ORIGINAL_DESCRIPTION - # [END table_reload] + # [END get_table] -def test_table_patch(client, to_delete): +def test_update_table_simple(client, to_delete): """Patch a table's metadata.""" - DATASET_NAME = 'table_patch_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_patch_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) + DATASET_ID = 'update_table_simple_dataset_%d' % (_millis(),) + TABLE_ID = 'update_table_simple_table_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset.description = ORIGINAL_DESCRIPTION - dataset.create() + client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) - table.friendly_name = ORIGINAL_FRIENDLY_NAME + table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA) table.description = ORIGINAL_DESCRIPTION - table.create() + table = client.create_table(table) to_delete.insert(0, table) - # [START table_patch] - assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + # [START update_table_simple] assert table.description == ORIGINAL_DESCRIPTION - table.patch( - friendly_name=PATCHED_FRIENDLY_NAME, - description=PATCHED_DESCRIPTION, - ) # API request - assert table.friendly_name == PATCHED_FRIENDLY_NAME - assert table.description == PATCHED_DESCRIPTION - # [END table_patch] + table.description = UPDATED_DESCRIPTION + + table = client.update_table(table, ['description']) # API request + + assert table.description == UPDATED_DESCRIPTION + # [END update_table_simple] -def test_table_update(client, to_delete): +def test_update_table_multiple_properties(client, to_delete): """Update a table's metadata.""" - DATASET_NAME = 'table_update_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_update_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) + DATASET_ID = 'update_table_multiple_properties_dataset_%d' % (_millis(),) + TABLE_ID = 'update_table_multiple_properties_table_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) dataset.description = ORIGINAL_DESCRIPTION - dataset.create() + client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) + table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA) table.friendly_name = ORIGINAL_FRIENDLY_NAME table.description = ORIGINAL_DESCRIPTION - table.create() + table = client.create_table(table) to_delete.insert(0, table) - # [START table_update] + # [START update_table_multiple_properties] assert table.friendly_name == ORIGINAL_FRIENDLY_NAME assert table.description == ORIGINAL_DESCRIPTION - NEW_SCHEMA = table.schema[:] - NEW_SCHEMA.append(SchemaField('phone', 'string')) + + NEW_SCHEMA = list(table.schema) + NEW_SCHEMA.append(bigquery.SchemaField('phone', 'STRING')) table.friendly_name = UPDATED_FRIENDLY_NAME table.description = UPDATED_DESCRIPTION table.schema = NEW_SCHEMA - table.update() # API request + table = client.update_table( + table, + ['schema', 'friendly_name', 'description'] + ) # API request + assert table.friendly_name == UPDATED_FRIENDLY_NAME assert table.description == UPDATED_DESCRIPTION assert table.schema == NEW_SCHEMA - # [END table_update] - + # [END update_table_multiple_properties] -def _warm_up_inserted_table_data(table): - # Allow for 90 seconds of "warm up" before rows visible. See - # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability - rows = () - counter = 18 - while len(rows) == 0 and counter > 0: - counter -= 1 - iterator = table.fetch_data() - page = six.next(iterator.pages) - rows = list(page) - if len(rows) == 0: - time.sleep(5) - - -def test_table_insert_fetch_data(client, to_delete): +def test_table_create_rows(client, to_delete): """Insert / fetch table data.""" - DATASET_NAME = 'table_insert_fetch_data_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_insert_fetch_data_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'table_create_rows_dataset_%d' % (_millis(),) + TABLE_ID = 'table_create_rows_table_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dataset = client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) - table.create() + table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA) + table = client.create_table(table) to_delete.insert(0, table) - # [START table_insert_data] + # [START table_create_rows] ROWS_TO_INSERT = [ (u'Phred Phlyntstone', 32), (u'Wylma Phlyntstone', 29), ] - table.insert_data(ROWS_TO_INSERT) - # [END table_insert_data] + errors = client.create_rows(table, ROWS_TO_INSERT) # API request - _warm_up_inserted_table_data(table) + assert errors == [] + # [END table_create_rows] - found_rows = [] - - def do_something(row): - found_rows.append(row) - - # [START table_fetch_data] - for row in table.fetch_data(): - do_something(row) - # [END table_fetch_data] - assert len(found_rows) == len(ROWS_TO_INSERT) - by_age = operator.itemgetter(1) - found_rows = reversed(sorted(found_rows, key=by_age)) - for found, to_insert in zip(found_rows, ROWS_TO_INSERT): - assert found == to_insert - - -def test_table_upload_from_file(client, to_delete): +def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" - DATASET_NAME = 'table_upload_from_file_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_upload_from_file_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() + DATASET_ID = 'table_upload_from_file_dataset_%d' % (_millis(),) + TABLE_ID = 'table_upload_from_file_table_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) - table.create() + table_ref = dataset.table(TABLE_ID) + table = bigquery.Table(table_ref, schema=SCHEMA) + table = client.create_table(table) to_delete.insert(0, table) - # [START table_upload_from_file] + # [START load_table_from_file] csv_file = six.BytesIO(b"""full_name,age Phred Phlyntstone,32 Wylma Phlyntstone,29 """) - load_job = table.upload_from_file( - csv_file, source_format='CSV', skip_leading_rows=1) - load_job.result() # Wait for table load to complete. - # [END table_upload_from_file] + table_ref = dataset.table(TABLE_ID) + job_config = bigquery.LoadJobConfig() + job_config.source_format = 'CSV' + job_config.skip_leading_rows = 1 + job = client.load_table_from_file( + csv_file, table_ref, job_config=job_config) # API request + job.result() # Waits for table load to complete. + # [END load_table_from_file] + + found_rows = [] - _warm_up_inserted_table_data(table) + def do_something(row): + found_rows.append(row) - iterator = table.fetch_data() + # [START table_list_rows] + for row in client.list_rows(table): # API request + do_something(row) + # [END table_list_rows] + + assert len(found_rows) == 2 + + # [START table_list_rows_iterator_properties] + iterator = client.list_rows(table) # API request page = six.next(iterator.pages) rows = list(page) total = iterator.total_rows token = iterator.next_page_token + # [END table_list_rows_iterator_properties] + row_tuples = [r.values() for r in rows] assert len(rows) == total == 2 assert token is None - assert (u'Phred Phlyntstone', 32) in rows - assert (u'Wylma Phlyntstone', 29) in rows + assert (u'Phred Phlyntstone', 32) in row_tuples + assert (u'Wylma Phlyntstone', 29) in row_tuples -def test_table_delete(client, to_delete): - """Delete a table.""" - DATASET_NAME = 'table_delete_dataset_%d' % (_millis(),) - TABLE_NAME = 'table_create_table_%d' % (_millis(),) - dataset = client.dataset(DATASET_NAME) - dataset.create() +def test_load_table_from_uri(client, to_delete): + ROWS = [ + ('Phred Phlyntstone', 32), + ('Bharney Rhubble', 33), + ('Wylma Phlyntstone', 29), + ('Bhettye Rhubble', 27), + ] + HEADER_ROW = ('Full Name', 'Age') + bucket_name = 'gs_bq_load_test_%d' % (_millis(),) + blob_name = 'person_ages.csv' + bucket, blob = _write_csv_to_storage( + bucket_name, blob_name, HEADER_ROW, ROWS) + to_delete.extend((blob, bucket)) + DATASET_ID = 'delete_table_dataset_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + client.create_dataset(dataset) to_delete.append(dataset) - table = dataset.table(TABLE_NAME, SCHEMA) - table.create() + # [START load_table_from_uri] + table_ref = dataset.table('person_ages') + table = bigquery.Table(table_ref) + table.schema = [ + bigquery.SchemaField('full_name', 'STRING', mode='required'), + bigquery.SchemaField('age', 'INTEGER', mode='required') + ] + client.create_table(table) # API request + GS_URL = 'gs://{}/{}'.format(bucket_name, blob_name) + job_id_prefix = "my_job" + job_config = bigquery.LoadJobConfig() + job_config.create_disposition = 'NEVER' + job_config.skip_leading_rows = 1 + job_config.source_format = 'CSV' + job_config.write_disposition = 'WRITE_EMPTY' + load_job = client.load_table_from_uri( + GS_URL, table_ref, job_config=job_config, + job_id_prefix=job_id_prefix) # API request + + assert load_job.state == 'RUNNING' + assert load_job.job_type == 'load' + + load_job.result() # Waits for table load to complete. + + assert load_job.state == 'DONE' + assert load_job.job_id.startswith(job_id_prefix) + # [END load_table_from_uri] - # [START table_delete] - assert table.exists() # API request - table.delete() # API request - assert not table.exists() # API request - # [END table_delete] + to_delete.insert(0, table) -def test_client_list_jobs(client): - """List jobs for a project.""" +def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows): + import csv + from google.cloud._testing import _NamedTemporaryFile + from google.cloud.storage import Client as StorageClient - def do_something_with(_): - pass + storage_client = StorageClient() - # [START client_list_jobs] - job_iterator = client.list_jobs() - for job in job_iterator: # API request(s) - do_something_with(job) - # [END client_list_jobs] + # In the **very** rare case the bucket name is reserved, this + # fails with a ConnectionError. + bucket = storage_client.create_bucket(bucket_name) + blob = bucket.blob(blob_name) -def test_client_run_sync_query(client): - """Run a synchronous query.""" - LIMIT = 100 - LIMITED = '%s LIMIT %d' % (QUERY, LIMIT) - TIMEOUT_MS = 10000 + with _NamedTemporaryFile() as temp: + with open(temp.name, 'w') as csv_write: + writer = csv.writer(csv_write) + writer.writerow(header_row) + writer.writerows(data_rows) - # [START client_run_sync_query] - query = client.run_sync_query(LIMITED) - query.timeout_ms = TIMEOUT_MS - query.run() # API request + with open(temp.name, 'rb') as csv_read: + blob.upload_from_file(csv_read, content_type='text/csv') - assert query.complete - assert len(query.rows) == LIMIT - assert [field.name for field in query.schema] == ['name'] - # [END client_run_sync_query] + return bucket, blob -def test_client_run_sync_query_w_param(client): - """Run a synchronous query using a query parameter""" - QUERY_W_PARAM = ( - 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' - 'WHERE state = @state') - LIMIT = 100 - LIMITED = '%s LIMIT %d' % (QUERY_W_PARAM, LIMIT) - TIMEOUT_MS = 10000 +def test_copy_table(client, to_delete): + DATASET_ID = 'copy_table_dataset_%d' % (_millis(),) + # [START copy_table] + source_dataset = bigquery.DatasetReference( + 'bigquery-public-data', 'samples') + source_table_ref = source_dataset.table('shakespeare') - # [START client_run_sync_query_w_param] - from google.cloud.bigquery import ScalarQueryParameter - param = ScalarQueryParameter('state', 'STRING', 'TX') - query = client.run_sync_query(LIMITED, query_parameters=[param]) - query.use_legacy_sql = False - query.timeout_ms = TIMEOUT_MS - query.run() # API request + dest_dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dest_dataset = client.create_dataset(dest_dataset) # API request + dest_table_ref = dest_dataset.table('destination_table') - assert query.complete - assert len(query.rows) == LIMIT - assert [field.name for field in query.schema] == ['name'] - # [END client_run_sync_query_w_param] + job_config = bigquery.CopyJobConfig() + job = client.copy_table( + source_table_ref, dest_table_ref, job_config=job_config) # API request + job.result() # Waits for job to complete. + assert job.state == 'DONE' + dest_table = client.get_table(dest_table_ref) # API request + assert dest_table.table_id == 'destination_table' + # [END copy_table] -def test_client_run_sync_query_paged(client): - """Run a synchronous query with paged results.""" - TIMEOUT_MS = 10000 - PAGE_SIZE = 100 - LIMIT = 1000 - LIMITED = '%s LIMIT %d' % (QUERY, LIMIT) + to_delete.append(dest_dataset) + to_delete.insert(0, dest_table) - all_rows = [] - def do_something_with(row): - all_rows.append(row) +def test_extract_table(client, to_delete): + DATASET_ID = 'export_data_dataset_%d' % (_millis(),) + dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + client.create_dataset(dataset) + to_delete.append(dataset) + + table_ref = dataset.table('person_ages') + table = client.create_table(bigquery.Table(table_ref, schema=SCHEMA)) + to_delete.insert(0, table) + client.create_rows(table, ROWS) + + bucket_name = 'extract_person_ages_job_%d' % (_millis(),) + # [START extract_table] + from google.cloud.storage import Client as StorageClient + + storage_client = StorageClient() + bucket = storage_client.create_bucket(bucket_name) # API request + destination_blob_name = 'person_ages_out.csv' + destination = bucket.blob(destination_blob_name) + + destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name) + extract_job = client.extract_table( + table_ref, destination_uri) # API request + extract_job.result(timeout=100) # Waits for job to complete. + + got = destination.download_as_string().decode('utf-8') # API request + assert 'Bharney Rhubble' in got + # [END extract_table] + to_delete.append(bucket) + to_delete.insert(0, destination) + + +def test_delete_table(client, to_delete): + """Delete a table.""" + DATASET_ID = 'delete_table_dataset_%d' % (_millis(),) + TABLE_ID = 'delete_table_table_%d' % (_millis(),) + dataset_ref = client.dataset(DATASET_ID) + dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) + to_delete.append(dataset) - # [START client_run_sync_query_paged] - query = client.run_sync_query(LIMITED) - query.timeout_ms = TIMEOUT_MS - query.max_results = PAGE_SIZE - query.run() # API request + table_ref = dataset.table(TABLE_ID) + table = bigquery.Table(table_ref, schema=SCHEMA) + client.create_table(table) + # [START delete_table] + from google.cloud.exceptions import NotFound - assert query.complete - assert query.page_token is not None - assert len(query.rows) == PAGE_SIZE - assert [field.name for field in query.schema] == ['name'] + client.delete_table(table) # API request - iterator = query.fetch_data() # API request(s) during iteration - for row in iterator: - do_something_with(row) - # [END client_run_sync_query_paged] + with pytest.raises(NotFound): + client.get_table(table) # API request + # [END delete_table] - assert iterator.total_rows == LIMIT - assert len(all_rows) == LIMIT +def test_client_query(client): + """Run a query""" -def test_client_run_sync_query_timeout(client): - """Run a synchronous query w/ timeout""" - TIMEOUT_MS = 10 + # [START client_query] + QUERY = ( + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + TIMEOUT = 30 # in seconds + query_job = client.query(QUERY) # API request - starts the query + assert query_job.state == 'RUNNING' - all_rows = [] + # Waits for the query to finish + iterator = query_job.result(timeout=TIMEOUT) + rows = list(iterator) - def do_something_with(row): - all_rows.append(row) + assert query_job.state == 'DONE' + assert len(rows) == 100 + row = rows[0] + assert row[0] == row.name == row['name'] + # [END client_query] - # [START client_run_sync_query_timeout] - query = client.run_sync_query(QUERY) - query.timeout_ms = TIMEOUT_MS - query.use_query_cache = False - query.run() # API request - assert not query.complete +def test_client_query_w_param(client): + """Run a query using a query parameter""" - job = query.job - job.reload() # API rquest - retry_count = 0 + # [START client_query_w_param] + QUERY_W_PARAM = ( + 'SELECT name, state ' + 'FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = @state ' + 'LIMIT 100') + TIMEOUT = 30 # in seconds + param = bigquery.ScalarQueryParameter('state', 'STRING', 'TX') + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [param] + query_job = client.query( + QUERY_W_PARAM, job_config=job_config) # API request - starts the query + assert query_job.state == 'RUNNING' + + # Waits for the query to finish + iterator = query_job.result(timeout=TIMEOUT) + rows = list(iterator) + + assert query_job.state == 'DONE' + assert len(rows) == 100 + row = rows[0] + assert row[0] == row.name == row['name'] + assert row.state == 'TX' + # [END client_query_w_param] + + +def test_client_query_rows(client): + """Run a simple query.""" + + # [START client_query_rows] + QUERY = ( + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + TIMEOUT = 30 # in seconds + rows = list(client.query_rows(QUERY, timeout=TIMEOUT)) # API request - while retry_count < 10 and job.state != u'DONE': - time.sleep(1.5**retry_count) # exponential backoff - retry_count += 1 - job.reload() # API request + assert len(rows) == 100 + row = rows[0] + assert row[0] == row.name == row['name'] + # [END client_query_rows] - assert job.state == u'DONE' - iterator = query.fetch_data() # API request(s) during iteration - for row in iterator: - do_something_with(row) - # [END client_run_sync_query_timeout] +def test_client_list_jobs(client): + """List jobs for a project.""" - assert len(all_rows) == iterator.total_rows + def do_something_with(_): + pass + + # [START client_list_jobs] + job_iterator = client.list_jobs() # API request(s) + for job in job_iterator: + do_something_with(job) + # [END client_list_jobs] if __name__ == '__main__': diff --git a/docs/bigquery/table.rst b/docs/bigquery/table.rst deleted file mode 100644 index 713f6116c932..000000000000 --- a/docs/bigquery/table.rst +++ /dev/null @@ -1,6 +0,0 @@ -Tables -~~~~~~ - -.. automodule:: google.cloud.bigquery.table - :members: - :show-inheritance: diff --git a/docs/bigquery/usage.rst b/docs/bigquery/usage.rst index 00908f8a780a..fe701f15106a 100644 --- a/docs/bigquery/usage.rst +++ b/docs/bigquery/usage.rst @@ -5,12 +5,8 @@ BigQuery :maxdepth: 2 :hidden: - client - dataset - job - query - schema - table + reference + dbapi Authentication / Configuration ------------------------------ @@ -50,10 +46,10 @@ To override the project inferred from the environment, pass an explicit ``project`` to the constructor, or to either of the alternative ``classmethod`` factories: - .. code-block:: python +.. code-block:: python - >>> from google.cloud import bigquery - >>> client = bigquery.Client(project='PROJECT_ID') + >>> from google.cloud import bigquery + >>> client = bigquery.Client(project='PROJECT_ID') Project ACLs @@ -61,7 +57,7 @@ Project ACLs Each project has an access control list granting reader / writer / owner permission to one or more entities. This list cannot be queried or set -via the API: it must be managed using the Google Developer Console. +via the API; it must be managed using the Google Developer Console. Datasets @@ -76,6 +72,9 @@ policies to tables as they are created: - A default table expiration period. If set, tables created within the dataset will have the value as their expiration period. +See BigQuery documentation for more information on +`Datasets `_. + Dataset operations ~~~~~~~~~~~~~~~~~~ @@ -89,401 +88,175 @@ List datasets for the client's project: Create a new dataset for the client's project: .. literalinclude:: snippets.py - :start-after: [START dataset_create] - :end-before: [END dataset_create] - -Check for the existence of a dataset: - -.. literalinclude:: snippets.py - :start-after: [START dataset_exists] - :end-before: [END dataset_exists] + :start-after: [START create_dataset] + :end-before: [END create_dataset] Refresh metadata for a dataset (to pick up changes made by another client): .. literalinclude:: snippets.py - :start-after: [START dataset_reload] - :end-before: [END dataset_reload] + :start-after: [START get_dataset] + :end-before: [END get_dataset] -Patch metadata for a dataset: +Update a property in a dataset's metadata: .. literalinclude:: snippets.py - :start-after: [START dataset_patch] - :end-before: [END dataset_patch] - -Replace the ACL for a dataset, and update all writeable fields: + :start-after: [START update_dataset_simple] + :end-before: [END update_dataset_simple] -.. code-block:: python +Update multiple properties in a dataset's metadata: - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> dataset.get() # API request - >>> acl = list(dataset.acl) - >>> acl.append(bigquery.Access(role='READER', entity_type='domain', entity='example.com')) - >>> dataset.acl = acl - >>> dataset.update() # API request +.. literalinclude:: snippets.py + :start-after: [START update_dataset_multiple_properties] + :end-before: [END update_dataset_multiple_properties] Delete a dataset: .. literalinclude:: snippets.py - :start-after: [START dataset_delete] - :end-before: [END dataset_delete] + :start-after: [START delete_dataset] + :end-before: [END delete_dataset] Tables ------ -Tables exist within datasets. List tables for the dataset: +Tables exist within datasets. See BigQuery documentation for more information +on `Tables `_. + +Table operations +~~~~~~~~~~~~~~~~~~ +List tables for the dataset: .. literalinclude:: snippets.py - :start-after: [START dataset_list_tables] - :end-before: [END dataset_list_tables] + :start-after: [START list_dataset_tables] + :end-before: [END list_dataset_tables] Create a table: .. literalinclude:: snippets.py - :start-after: [START table_create] - :end-before: [END table_create] + :start-after: [START create_table] + :end-before: [END create_table] -Check for the existence of a table: +Get a table: .. literalinclude:: snippets.py - :start-after: [START table_exists] - :end-before: [END table_exists] + :start-after: [START get_table] + :end-before: [END get_table] -Refresh metadata for a table (to pick up changes made by another client): +Update a property in a table's metadata: .. literalinclude:: snippets.py - :start-after: [START table_reload] - :end-before: [END table_reload] + :start-after: [START update_table_simple] + :end-before: [END update_table_simple] -Patch specific properties for a table: +Update multiple properties in a table's metadata: .. literalinclude:: snippets.py - :start-after: [START table_patch] - :end-before: [END table_patch] + :start-after: [START update_table_multiple_properties] + :end-before: [END update_table_multiple_properties] -Update all writable metadata for a table +Get rows from a table's data: .. literalinclude:: snippets.py - :start-after: [START table_update] - :end-before: [END table_update] + :start-after: [START table_list_rows] + :end-before: [END table_list_rows] -Get rows from a table's data: +Utilize iterator properties returned with row data: .. literalinclude:: snippets.py - :start-after: [START table_fetch_data] - :end-before: [END table_fetch_data] + :start-after: [START table_list_rows_iterator_properties] + :end-before: [END table_list_rows_iterator_properties] Insert rows into a table's data: .. literalinclude:: snippets.py - :start-after: [START table_insert_data] - :end-before: [END table_insert_data] + :start-after: [START table_create_rows] + :end-before: [END table_create_rows] Upload table data from a file: .. literalinclude:: snippets.py - :start-after: [START table_upload_from_file] - :end-before: [END table_upload_from_file] + :start-after: [START load_table_from_file] + :end-before: [END load_table_from_file] -Delete a table: +Load table data from Google Cloud Storage: .. literalinclude:: snippets.py - :start-after: [START table_delete] - :end-before: [END table_delete] - - -Jobs ----- - -Jobs describe actions peformed on data in BigQuery tables: - -- Load data into a table -- Run a query against data in one or more tables -- Extract data from a table -- Copy a table + :start-after: [START load_table_from_uri] + :end-before: [END load_table_from_uri] -List jobs for a project: +Copy a table: .. literalinclude:: snippets.py - :start-after: [START client_list_jobs] - :end-before: [END client_list_jobs] + :start-after: [START copy_table] + :end-before: [END copy_table] - -Querying data (synchronous) -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Run a query which can be expected to complete within bounded time: +Extract a table to Google Cloud Storage: .. literalinclude:: snippets.py - :start-after: [START client_run_sync_query] - :end-before: [END client_run_sync_query] + :start-after: [START extract_table] + :end-before: [END extract_table] -Run a query using a named query parameter: +Delete a table: .. literalinclude:: snippets.py - :start-after: [START client_run_sync_query_w_param] - :end-before: [END client_run_sync_query_w_param] + :start-after: [START delete_table] + :end-before: [END delete_table] -If the rows returned by the query do not fit into the initial response, -then we need to fetch the remaining rows via -:meth:`~google.cloud.bigquery.query.QueryResults.fetch_data`: -.. literalinclude:: snippets.py - :start-after: [START client_run_sync_query_paged] - :end-before: [END client_run_sync_query_paged] +Queries +------- -If the query takes longer than the timeout allowed, ``query.complete`` -will be ``False``. In that case, we need to poll the associated job until -it is done, and then fetch the results: +Querying data +~~~~~~~~~~~~~ .. literalinclude:: snippets.py - :start-after: [START client_run_sync_query_timeout] - :end-before: [END client_run_sync_query_timeout] - - -Querying data (asynchronous) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Background a query, loading the results into a table: - -.. code-block:: python - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> query = """\ - SELECT firstname + ' ' + last_name AS full_name, - FLOOR(DATEDIFF(CURRENT_DATE(), birth_date) / 365) AS age - FROM dataset_name.persons - """ - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> job = client.run_async_query('fullname-age-query-job', query) - >>> job.destination = table - >>> job.write_disposition= 'WRITE_TRUNCATE' - >>> job.name - 'fullname-age-query-job' - >>> job.job_type - 'query' - >>> job.created - None - >>> job.state - None - -.. note:: - - - The ``created`` and ``state`` fields are not set until the job - is submitted to the BigQuery back-end. - -Then, begin executing the job on the server: - -.. code-block:: python - - >>> job.begin() # API call - >>> job.created - datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=) - >>> job.state - 'RUNNING' - -Poll until the job is complete: - -.. code-block:: python - - >>> import time - >>> retry_count = 100 - >>> while retry_count > 0 and job.state != 'DONE': - ... retry_count -= 1 - ... time.sleep(10) - ... job.reload() # API call - >>> job.state - 'done' - >>> job.ended - datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=) - -Retrieve the results: - -.. code-block:: python - - >>> results = job.results() - >>> rows, total_count, token = query.fetch_data() # API request - >>> while True: - ... do_something_with(rows) - ... if token is None: - ... break - ... rows, total_count, token = query.fetch_data( - ... page_token=token) # API request - - -Inserting data (asynchronous) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Start a job loading data asynchronously from a set of CSV files, located on -Google Cloud Storage, appending rows into an existing table. First, create -the job locally: - -.. code-block:: python - - >>> from google.cloud import bigquery - >>> from google.cloud.bigquery import SchemaField - >>> client = bigquery.Client() - >>> table = dataset.table(name='person_ages') - >>> table.schema = [ - ... SchemaField('full_name', 'STRING', mode='required'), - ... SchemaField('age', 'INTEGER', mode='required')] - >>> job = client.load_table_from_storage( - ... 'load-from-storage-job', table, 'gs://bucket-name/object-prefix*') - >>> job.source_format = 'CSV' - >>> job.skip_leading_rows = 1 # count of skipped header rows - >>> job.write_disposition = 'WRITE_TRUNCATE' - >>> job.name - 'load-from-storage-job' - >>> job.job_type - 'load' - >>> job.created - None - >>> job.state - None - -.. note:: - - - ``google.cloud.bigquery`` generates a UUID for each job. - - The ``created`` and ``state`` fields are not set until the job - is submitted to the BigQuery back-end. - -Then, begin executing the job on the server: - -.. code-block:: python - - >>> job.begin() # API call - >>> job.created - datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=) - >>> job.state - 'RUNNING' - -Poll until the job is complete: - -.. code-block:: python - - >>> import time - >>> retry_count = 100 - >>> while retry_count > 0 and job.state != 'DONE': - ... retry_count -= 1 - ... time.sleep(10) - ... job.reload() # API call - >>> job.state - 'done' - >>> job.ended - datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=) - - -Exporting data (async) -~~~~~~~~~~~~~~~~~~~~~~ - -Start a job exporting a table's data asynchronously to a set of CSV files, -located on Google Cloud Storage. First, create the job locally: - -.. code-block:: python - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> table = dataset.table(name='person_ages') - >>> job = client.extract_table_to_storage( - ... 'extract-person-ages-job', table, - ... 'gs://bucket-name/export-prefix*.csv') - ... job.destination_format = 'CSV' - ... job.print_header = True - ... job.write_disposition = 'WRITE_TRUNCATE' - >>> job.name - 'extract-person-ages-job' - >>> job.job_type - 'extract' - >>> job.created - None - >>> job.state - None + :start-after: [START client_query] + :end-before: [END client_query] .. note:: - - ``google.cloud.bigquery`` generates a UUID for each job. - - The ``created`` and ``state`` fields are not set until the job - is submitted to the BigQuery back-end. + - Use of the ``timeout`` parameter is optional. The query will continue to + run in the background even if it takes longer the timeout allowed. -Then, begin executing the job on the server: -.. code-block:: python - - >>> job.begin() # API call - >>> job.created - datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=) - >>> job.state - 'RUNNING' +Run a query using a named query parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Poll until the job is complete: - -.. code-block:: python - - >>> import time - >>> retry_count = 100 - >>> while retry_count > 0 and job.state != 'DONE': - ... retry_count -= 1 - ... time.sleep(10) - ... job.reload() # API call - >>> job.state - 'done' - >>> job.ended - datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=) +See BigQuery documentation for more information on +`parameterized queries `_. +.. literalinclude:: snippets.py + :start-after: [START client_query_w_param] + :end-before: [END client_query_w_param] -Copy tables (async) -~~~~~~~~~~~~~~~~~~~ -First, create the job locally: +Querying Table Rows +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code-block:: python +Run a query and wait for it to finish: - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> source_table = dataset.table(name='person_ages') - >>> destination_table = dataset.table(name='person_ages_copy') - >>> job = client.copy_table( - ... 'copy-table-job', destination_table, source_table) - >>> job.name - 'copy-table-job' - >>> job.job_type - 'copy' - >>> job.created - None - >>> job.state - None +.. literalinclude:: snippets.py + :start-after: [START client_query_rows] + :end-before: [END client_query_rows] .. note:: - - ``google.cloud.bigquery`` generates a UUID for each job. - - The ``created`` and ``state`` fields are not set until the job - is submitted to the BigQuery back-end. - -Then, begin executing the job on the server: + - Use of the ``timeout`` parameter is optional. The query will continue to + run in the background even if it takes longer the timeout allowed. The job + may be retrieved using the job ID via + :meth:`~google.cloud.bigquery.client.Client.get_job` -.. code-block:: python - >>> job.begin() # API call - >>> job.created - datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=) - >>> job.state - 'RUNNING' +List jobs for a project +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Poll until the job is complete: +Jobs describe actions performed on data in BigQuery tables: -.. code-block:: python +- Load data into a table +- Run a query against data in one or more tables +- Extract data from a table +- Copy a table - >>> import time - >>> retry_count = 100 - >>> while retry_count > 0 and job.state != 'DONE': - ... retry_count -= 1 - ... time.sleep(10) - ... job.reload() # API call - >>> job.state - 'done' - >>> job.ended - datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=) +.. literalinclude:: snippets.py + :start-after: [START client_list_jobs] + :end-before: [END client_list_jobs]